50#define PATTERN_HTMLMETA "<meta\\s+http-equiv\\s*=\\s*\"?content-type\"?\\s+content\\s*=\\s*\"text/x?html;\\s*charset=([a-z0-9_-]+)\"\\s*/?>"
52#define PATTERN_CODING "coding[\t ]*[:=][\t ]*\"?([a-z0-9-]+)\"?[\t ]*"
62#define fill(Order, Group, Idx, Charset, Name) \
63 encodings[Idx].idx = Idx; \
64 encodings[Idx].order = Order; \
65 encodings[Idx].group = Group; \
66 encodings[Idx].charset = Charset; \
67 encodings[Idx].name = Name;
150 gboolean was_alpha = FALSE;
151 gboolean need_sep = FALSE;
157 if (g_ascii_toupper(*a) == g_ascii_toupper(*b) &&
158 ((is_alpha = g_ascii_isalpha(*a)) || g_ascii_isdigit(*a)))
162 if (! need_sep || (was_alpha != is_alpha))
166 was_alpha = is_alpha;
176 if (! g_ascii_isalnum(*a))
181 if (! g_ascii_isalnum(*b))
240 if (encoding !=
NULL)
277 g_return_val_if_fail(enc !=
NULL,
NULL);
281 return g_strdup_printf(
"%s (%s)", enc->
name, enc->
charset);
287 g_return_val_if_fail(enc !=
NULL,
NULL);
301 g_return_if_fail(charset !=
NULL);
314 gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(
radio_items[i]), TRUE);
326 GRegex *regex = g_regex_new(pattern, G_REGEX_CASELESS, 0, &
error);
337static gchar *
regex_match(GRegex *preg,
const gchar *buffer, gsize size)
339 gchar *encoding =
NULL;
346 size = MIN(size, 512);
348 if (g_regex_match_full(preg, buffer, size, 0, 0, &minfo,
NULL) &&
349 g_match_info_get_match_count(minfo) >= 2)
351 encoding = g_match_info_fetch(minfo, 1);
352 geany_debug(
"Detected encoding by regex search: %s", encoding);
354 SETPTR(encoding, g_utf8_strup(encoding, -1));
356 g_match_info_free(minfo);
364 const gchar *charset = user_data;
367 ! gtk_check_menu_item_get_active(menuitem) ||
385 g_return_if_fail(doc !=
NULL);
396 len = G_N_ELEMENTS(
pregs);
397 for (i = 0; i < len; i++)
399 g_regex_unref(
pregs[i]);
408 GCallback cb_func[2];
416 [
ASIAN] =
N_(
"_SE & SW Asian"),
436 for (guint i = 0; i < G_N_ELEMENTS(
encodings); i++)
439 for (guint k = 0; k < 2; k++)
449 submenus[i] = menu[k];
452 GtkWidget *item = gtk_menu_item_new_with_mnemonic(
_(
groups[i]));
453 submenus[i] = gtk_menu_new();
454 gtk_menu_item_set_submenu(GTK_MENU_ITEM(item), submenus[i]);
463 for (guint i = 0; i < G_N_ELEMENTS(
encodings); i++)
472 item = gtk_radio_menu_item_new_with_label(
group,
label);
473 group = gtk_radio_menu_item_get_group(GTK_RADIO_MENU_ITEM(item));
477 item = gtk_menu_item_new_with_label(
label);
478 gtk_widget_show(item);
480 g_signal_connect(item,
"activate", cb_func[k],
489 while (n_added < G_N_ELEMENTS(
encodings));
499 gboolean a_has_child = gtk_tree_model_iter_has_child(model, a);
500 gboolean b_has_child = gtk_tree_model_iter_has_child(model, b);
505 if (a_has_child != b_has_child)
506 return a_has_child ? -1 : 1;
508 gtk_tree_model_get(model, a, 1, &a_string, -1);
509 gtk_tree_model_get(model, b, 1, &b_string, -1);
510 cmp_res = strcmp(a_string, b_string);
520 GtkTreeIter iter_current, iter_westeuro, iter_easteuro, iter_eastasian,
521 iter_asian, iter_utf8, iter_middleeast;
522 GtkTreeIter *iter_parent;
525 store = gtk_tree_store_new(2, G_TYPE_INT, G_TYPE_STRING);
529 gtk_tree_store_append(store, &iter_current,
NULL);
533 gtk_tree_store_append(store, &iter_westeuro,
NULL);
534 gtk_tree_store_set(store, &iter_westeuro, 0, -1, 1,
_(
"West European"), -1);
535 gtk_tree_store_append(store, &iter_easteuro,
NULL);
536 gtk_tree_store_set(store, &iter_easteuro, 0, -1, 1,
_(
"East European"), -1);
537 gtk_tree_store_append(store, &iter_eastasian,
NULL);
538 gtk_tree_store_set(store, &iter_eastasian, 0, -1, 1,
_(
"East Asian"), -1);
539 gtk_tree_store_append(store, &iter_asian,
NULL);
540 gtk_tree_store_set(store, &iter_asian, 0, -1, 1,
_(
"SE & SW Asian"), -1);
541 gtk_tree_store_append(store, &iter_middleeast,
NULL);
542 gtk_tree_store_set(store, &iter_middleeast, 0, -1, 1,
_(
"Middle Eastern"), -1);
543 gtk_tree_store_append(store, &iter_utf8,
NULL);
544 gtk_tree_store_set(store, &iter_utf8, 0, -1, 1,
_(
"Unicode"), -1);
548 gchar *encoding_string;
554 case EASTASIAN: iter_parent = &iter_eastasian;
break;
555 case ASIAN: iter_parent = &iter_asian;
break;
557 case UNICODE: iter_parent = &iter_utf8;
break;
559 default: iter_parent =
NULL;
561 gtk_tree_store_append(store, &iter_current, iter_parent);
563 gtk_tree_store_set(store, &iter_current, 0, i, 1, encoding_string, -1);
564 g_free(encoding_string);
567 gtk_tree_sortable_set_sort_column_id(GTK_TREE_SORTABLE(store), 1, GTK_SORT_ASCENDING);
577 gtk_tree_model_get(GTK_TREE_MODEL(store), iter, 0, &enc, -1);
584 if (gtk_tree_model_get_iter_first(GTK_TREE_MODEL(store), iter))
598 GtkCellRenderer *cell,
599 GtkTreeModel *tree_model,
603 gboolean sensitive = !gtk_tree_model_iter_has_child(tree_model, iter);
606 gtk_tree_model_get(tree_model, iter, 1, &
text, -1);
607 g_object_set(cell,
"sensitive", sensitive,
"text",
text,
NULL);
626 const gchar *charset, gboolean fast)
628 gchar *utf8_content =
NULL;
629 GError *conv_error =
NULL;
630 gchar* converted_contents =
NULL;
633 g_return_val_if_fail(buffer !=
NULL,
NULL);
634 g_return_val_if_fail(charset !=
NULL,
NULL);
636 converted_contents = g_convert(buffer, size,
"UTF-8", charset,
NULL,
637 &bytes_written, &conv_error);
641 utf8_content = converted_contents;
642 if (conv_error !=
NULL) g_error_free(conv_error);
644 else if (conv_error !=
NULL || ! g_utf8_validate(converted_contents, bytes_written,
NULL))
646 if (conv_error !=
NULL)
648 geany_debug(
"Couldn't convert from %s to UTF-8 (%s).", charset, conv_error->message);
649 g_error_free(conv_error);
653 geany_debug(
"Couldn't convert from %s to UTF-8.", charset);
656 g_free(converted_contents);
660 geany_debug(
"Converted from %s to UTF-8.", charset);
661 utf8_content = converted_contents;
672 for (i = 0; i < G_N_ELEMENTS(
pregs); i++)
684 const gchar *suggested_charset, gchar **used_encoding)
686 const gchar *locale_charset =
NULL;
687 const gchar *charset;
689 gboolean check_suggestion = suggested_charset !=
NULL;
690 gboolean check_locale = FALSE;
691 gint i, preferred_charset;
695 size = strlen(buffer);
699 check_locale = ! g_get_charset(&locale_charset);
705 preferred_charset < 0 ||
708 preferred_charset = -1;
717 if (check_suggestion)
719 check_suggestion = FALSE;
722 charset = suggested_charset;
725 else if (check_locale)
727 check_locale = FALSE;
728 charset = locale_charset;
733 if (preferred_charset >= 0)
736 geany_debug(
"Using preferred charset: %s", charset);
746 if (G_UNLIKELY(charset ==
NULL))
749 geany_debug(
"Trying to convert %" G_GSIZE_FORMAT
" bytes of data from %s into UTF-8.",
753 if (G_LIKELY(utf8_content !=
NULL))
755 if (used_encoding !=
NULL)
757 if (G_UNLIKELY(*used_encoding !=
NULL))
760 g_free(*used_encoding);
762 *used_encoding = g_strdup(charset);
786 gchar *regex_charset;
792 g_free(regex_charset);
808 if ((guchar)
string[0] == 0xef && (guchar)
string[1] == 0xbb &&
809 (guchar)
string[2] == 0xbf)
819 if ((guchar)
string[0] == 0x00 && (guchar)
string[1] == 0x00 &&
820 (guchar)
string[2] == 0xfe && (guchar)
string[3] == 0xff)
824 if ((guchar)
string[0] == 0xff && (guchar)
string[1] == 0xfe &&
825 (guchar)
string[2] == 0x00 && (guchar)
string[3] == 0x00)
829 if ((
string[0] == 0x2b &&
string[1] == 0x2f &&
string[2] == 0x76) &&
830 (
string[3] == 0x38 ||
string[3] == 0x39 ||
string[3] == 0x2b ||
string[3] == 0x2f))
840 if ((guchar)
string[0] == 0xfe && (guchar)
string[1] == 0xff)
844 if ((guchar)
string[0] == 0xff && (guchar)
string[1] == 0xfe)
857 if (
string !=
NULL &&
858 (strncmp(
string,
"UTF", 3) == 0 || strncmp(
string,
"UCS", 3) == 0))
885 if (! g_utf8_validate(buffer->
data, buffer->
len,
NULL))
893 buffer->
data, buffer->
size, forced_enc, FALSE);
894 if (converted_text ==
NULL)
901 buffer->
len = strlen(converted_text);
906 buffer->
enc = g_strdup(forced_enc);
915 g_return_val_if_fail(buffer->
enc ==
NULL, FALSE);
916 g_return_val_if_fail(buffer->
bom == FALSE, FALSE);
918 if (buffer->
size == 0)
922 buffer->
enc = g_strdup(
"UTF-8");
936 if (converted_text !=
NULL)
939 buffer->
len = strlen(converted_text);
959 buffer->
enc = g_strdup(
"UTF-8");
965 buffer->
size, regex_charset, &buffer->
enc);
967 if (converted_text ==
NULL)
969 g_free(regex_charset);
973 buffer->
len = strlen(converted_text);
975 g_free(regex_charset);
988 g_return_if_fail(bom_len != 0);
991 buffer->
len -= bom_len;
993 memmove(buffer->
data, buffer->
data + bom_len, buffer->
len + 1);
994 buffer->
data = g_realloc(buffer->
data, buffer->
len + 1);
1010 if (buffer->
len != buffer->
size && buffer->
size != 0 && (
1018 if (forced_enc !=
NULL)
1023 buffer->
bom = FALSE;
1058 gchar **used_encoding, gboolean *has_bom, gboolean *partial)
1063 buffer.
size = *size;
1065 buffer.
len = strlen(buffer.
data);
1075 *used_encoding = buffer.
enc;
1079 *has_bom = buffer.
bom;
static const gchar * groups[GEANY_GBG_COUNT]
GeanyDocument * document_get_current(void)
Finds the current document.
void document_undo_add(GeanyDocument *doc, guint type, gpointer data)
void document_set_encoding(GeanyDocument *doc, const gchar *new_encoding)
Sets the encoding of a document.
GeanyFilePrefs file_prefs
gboolean document_reload_prompt(GeanyDocument *doc, const gchar *forced_enc)
gboolean encodings_convert_to_utf8_auto(gchar **buf, gsize *size, const gchar *forced_enc, gchar **used_encoding, gboolean *has_bom, gboolean *partial)
gchar * encodings_convert_to_utf8_from_charset(const gchar *buffer, gssize size, const gchar *charset, gboolean fast)
Tries to convert buffer into UTF-8 encoding from the encoding specified with charset.
void encodings_encoding_store_cell_data_func(GtkCellLayout *cell_layout, GtkCellRenderer *cell, GtkTreeModel *tree_model, GtkTreeIter *iter, gpointer data)
static gboolean encodings_charset_equals(const gchar *a, const gchar *b)
static gint encoding_combo_store_sort_func(GtkTreeModel *model, GtkTreeIter *a, GtkTreeIter *b, gpointer data)
static gchar * encodings_check_regexes(const gchar *buffer, gsize size)
static void handle_bom(BufferData *buffer)
void encodings_finalize(void)
GeanyEncoding encodings[GEANY_ENCODINGS_MAX]
static void encodings_radio_item_change_cb(GtkCheckMenuItem *menuitem, gpointer user_data)
static gchar * regex_match(GRegex *preg, const gchar *buffer, gsize size)
GeanyEncodingIndex encodings_scan_unicode_bom(const gchar *string, gsize len, guint *bom_len)
const gchar * encodings_get_charset_from_index(gint idx)
Gets the character set name of the specified index e.g.
GeanyEncodingIndex encodings_get_idx_from_charset(const gchar *charset)
static gboolean handle_forced_encoding(BufferData *buffer, const gchar *forced_enc)
gboolean encodings_encoding_store_get_iter(GtkTreeStore *store, GtkTreeIter *iter, gint enc)
static GtkWidget * radio_items[GEANY_ENCODINGS_MAX]
static gboolean handle_buffer(BufferData *buffer, const gchar *forced_enc)
static const gchar * encodings_normalize_charset(const gchar *charset)
static gchar * encodings_convert_to_utf8_with_suggestion(const gchar *buffer, gssize size, const gchar *suggested_charset, gchar **used_encoding)
void encodings_init(void)
gint encodings_encoding_store_get_encoding(GtkTreeStore *store, GtkTreeIter *iter)
gchar * encodings_to_string(const GeanyEncoding *enc)
gboolean encodings_is_unicode_charset(const gchar *string)
static void encodings_reload_radio_item_change_cb(GtkMenuItem *menuitem, gpointer user_data)
gchar * encodings_convert_to_utf8(const gchar *buffer, gssize size, gchar **used_encoding)
Tries to convert buffer into UTF-8 encoding and store the detected original encoding in used_encoding...
static gboolean handle_encoding(BufferData *buffer, GeanyEncodingIndex enc_idx)
const GeanyEncoding * encodings_get_from_index(gint idx)
static GRegex * regex_compile(const gchar *pattern)
static void init_encodings(void)
static gboolean pregs_loaded
#define fill(Order, Group, Idx, Charset, Name)
void encodings_select_radio_item(const gchar *charset)
GtkTreeStore * encodings_encoding_store_new(gboolean has_detect)
const GeanyEncoding * encodings_get_from_charset(const gchar *charset)
const gchar * encodings_get_charset(const GeanyEncoding *enc)
Encoding conversion and Byte Order Mark (BOM) handling.
GeanyEncodingIndex
List of known and supported encodings.
@ GEANY_ENCODING_UTF_32BE
@ GEANY_ENCODING_ISO_8859_6
@ GEANY_ENCODING_ISO_8859_14
@ GEANY_ENCODING_BIG5_HKSCS
@ GEANY_ENCODING_ISO_8859_8
@ GEANY_ENCODING_ISO_8859_1
@ GEANY_ENCODING_ISO_2022_KR
@ GEANY_ENCODING_WINDOWS_1254
@ GEANY_ENCODING_ISO_IR_111
@ GEANY_ENCODING_ISO_8859_4
@ GEANY_ENCODING_SHIFT_JIS
@ GEANY_ENCODING_WINDOWS_1253
@ GEANY_ENCODING_ISO_8859_7
@ GEANY_ENCODING_WINDOWS_1251
@ GEANY_ENCODING_UTF_32LE
@ GEANY_ENCODING_ISO_8859_5
@ GEANY_ENCODING_ISO_8859_2
@ GEANY_ENCODING_UTF_16LE
@ GEANY_ENCODING_ISO_8859_9
@ GEANY_ENCODING_ISO_8859_16
@ GEANY_ENCODING_WINDOWS_1256
@ GEANY_ENCODING_ISO_8859_3
@ GEANY_ENCODING_ISO_8859_13
@ GEANY_ENCODING_ISO_2022_JP
@ GEANY_ENCODING_ISO_8859_10
@ GEANY_ENCODING_WINDOWS_1257
@ GEANY_ENCODING_ARMSCII_8
@ GEANY_ENCODING_WINDOWS_1252
@ GEANY_ENCODING_WINDOWS_1258
@ GEANY_ENCODING_UTF_16BE
@ GEANY_ENCODING_WINDOWS_1255
@ GEANY_ENCODING_WINDOWS_1250
@ GEANY_ENCODING_ISO_8859_15
@ GEANY_ENCODING_ISO_8859_8_I
@ GEANY_ENCODING_GROUPS_MAX
void error(const errorSelection selection, const char *const format,...)
void geany_debug(gchar const *format,...)
gtk_container_add(GTK_CONTAINER(dialog->vbox), check_button)
gtk_widget_show_all(dialog)
Structure for representing an open tab with all its properties.
gchar * encoding
The encoding of the document, must be a valid string representation of an encoding,...
gboolean readonly
Whether this document is read-only.
gint default_open_encoding
GtkWidget * window
Main window.
Defines internationalization macros.
gboolean ui_tree_model_iter_any_next(GtkTreeModel *model, GtkTreeIter *iter, gboolean down)
GeanyMainWidgets main_widgets
GtkWidget * ui_lookup_widget(GtkWidget *widget, const gchar *widget_name)
Returns a widget from a name in a component, usually created by Glade.
User Interface general utility functions.
gboolean utils_str_equal(const gchar *a, const gchar *b)
NULL-safe string comparison.
General utility functions, non-GTK related.
#define SETPTR(ptr, result)
Assigns result to ptr, then frees the old value.