geany  1.38
About: Geany is a text editor (using GTK2) with basic features of an integrated development environment (syntax highlighting, code folding, symbol name auto-completion, ...). F: office T: editor programming GTK+ IDE
  Fossies Dox: geany-1.38.tar.bz2  ("unofficial" and yet experimental doxygen-generated source code documentation)  

parse.c
Go to the documentation of this file.
1/*
2* Copyright (c) 1996-2003, Darren Hiebert
3*
4* This source code is released for free distribution under the terms of the
5* GNU General Public License version 2 or (at your option) any later version.
6*
7* This module contains functions for managing input languages and
8* dispatching files to the appropriate language parser.
9*/
10
11/*
12* INCLUDE FILES
13*/
14#include "general.h" /* must always come first */
15
16/* TODO: This definition should be removed. */
17#define OPTION_WRITE
18#include "options_p.h"
19
20#include <string.h>
21
22#include "ctags.h"
23#include "debug.h"
24#include "entry_p.h"
25#include "field_p.h"
26#include "flags_p.h"
27#include "htable.h"
28#include "keyword.h"
29#include "lxpath_p.h"
30#include "param.h"
31#include "param_p.h"
32#include "parse_p.h"
33#include "parsers_p.h"
34#include "promise.h"
35#include "promise_p.h"
36#include "ptag_p.h"
37#include "ptrarray.h"
38#include "read.h"
39#include "read_p.h"
40#include "routines.h"
41#include "routines_p.h"
42#include "stats_p.h"
43#include "subparser.h"
44#include "subparser_p.h"
45#include "trace.h"
46#include "trashbox.h"
47#include "trashbox_p.h"
48#include "vstring.h"
49#ifdef HAVE_ICONV
50# include "mbcs_p.h"
51#endif
52#include "writer_p.h"
53#include "xtag_p.h"
54
55/*
56 * DATA TYPES
57 */
64};
65const char *specTypeName [] = {
66 "none", "name", "extension", "pattern"
67};
68
69typedef struct {
71 const char* spec;
74
75typedef struct sParserObject {
77
79
80 stringList* currentPatterns; /* current list of file name patterns */
81 stringList* currentExtensions; /* current list of extensions */
82 stringList* currentAliases; /* current list of aliases */
83
84 unsigned int initialized:1; /* initialize() is called or not */
85 unsigned int dontEmit:1; /* run but don't emit tags.
86 This parser was disabled but a subparser on
87 this parser makes this parser run (to drive
88 the subparser). */
89 unsigned int pseudoTagPrinted:1; /* pseudo tags about this parser
90 is emitted or not. */
91 unsigned int used; /* Used for printing language specific statistics. */
92
93 unsigned int anonymousIdentiferId; /* managed by anon* functions */
94
98
99 langType pretendingAsLanguage; /* OLDLANG in --_pretend-<NEWLANG>=<OLDLANG>
100 is set here if this parser is NEWLANG.
101 LANG_IGNORE is set if no pretending. */
102 langType pretendedAsLanguage; /* NEWLANG in --_pretend-<NEWLANG>=<OLDLANG>
103 is set here if this parser is OLDLANG.
104 LANG_IGNORE is set if no being pretended. */
105
107
108/*
109 * FUNCTION PROTOTYPES
110 */
111
112static void lazyInitialize (langType language);
113static void addParserPseudoTags (langType language);
114static void installKeywordTable (const langType language);
115static void installTagRegexTable (const langType language);
116static void installTagXpathTable (const langType language);
117static void anonResetMaybe (parserObject *parser);
118static void setupAnon (void);
119static void teardownAnon (void);
120static void uninstallTagXpathTable (const langType language);
121
122/*
123* DATA DEFINITIONS
124*/
125static parserDefinition *FallbackParser (void);
126static parserDefinition *CTagsParser (void);
129#ifdef EXTERNAL_PARSER_LIST
131#else /* ! EXTERNAL_PARSER_LIST */
132 CTagsParser, /* This must be first entry. */
133 FallbackParser, /* LANG_FALLBACK */
135
138#ifdef HAVE_LIBXML
139 ,
140#endif
142#ifdef HAVE_LIBYAML
143 ,
144#endif
146#ifdef HAVE_PACKCC
147 ,
148#endif
149#endif /* EXTERNAL_PARSER_LIST */
150};
152static unsigned int LanguageCount = 0;
155 .enabled = false,
156 .letter = KIND_FILE_DEFAULT_LETTER,
158 .description = KIND_FILE_DEFAULT_NAME,
159};
160
161/*
162* FUNCTION DEFINITIONS
163*/
164
165static bool isLanguageNameChar(int c)
166{
167 if (isgraph(c))
168 {
169 if (c == '\'' || c == '"' || c == ';')
170 return false;
171 return true;
172 }
173 else
174 return false;
175}
176
177extern unsigned int countParsers (void)
178{
179 return LanguageCount;
180}
181
182extern int makeSimpleTag (
183 const vString* const name, const int kindIndex)
184{
185 return makeSimpleRefTag (name, kindIndex, ROLE_DEFINITION_INDEX);
186}
187
188extern int makeSimpleRefTag (const vString* const name, const int kindIndex,
189 int roleIndex)
190{
191 int r = CORK_NIL;
192
193 Assert (roleIndex < (int)countInputLanguageRoles(kindIndex));
194
195 /* do not check for kind being disabled - that happens later in makeTagEntry() */
196 if (name != NULL && vStringLength (name) > 0)
197 {
198 tagEntryInfo e;
199 initRefTagEntry (&e, vStringValue (name), kindIndex, roleIndex);
200
201 r = makeTagEntry (&e);
202 }
203 return r;
204}
205
206extern int makeSimplePlaceholder(const vString* const name)
207{
209}
210
211extern bool isLanguageEnabled (const langType language)
212{
213 const parserDefinition* const lang = LanguageTable [language].def;
214
215 if (!lang->enabled)
216 return false;
217
218 if ((lang->kindTable == NULL) &&
219 (!(lang->method & METHOD_REGEX)) &&
220 (!(lang->method & METHOD_XPATH)))
221 return false;
222 else
223 return true;
224}
225
226extern bool isLanguageVisible (const langType language)
227{
228 const parserDefinition* const lang = LanguageTable [language].def;
229
230 return !lang->invisible;
231}
232
233/*
234* parserDescription mapping management
235*/
236
237extern parserDefinition* parserNew (const char* name)
238{
240 result->name = eStrdup (name);
241
242 result->enabled = true;
243 return result;
244}
245
246extern bool doesLanguageAllowNullTag (const langType language)
247{
248 Assert (0 <= language && language < (int) LanguageCount);
249 return LanguageTable [language].def->allowNullTag;
250}
251
252extern bool doesLanguageRequestAutomaticFQTag (const langType language)
253{
254 Assert (0 <= language && language < (int) LanguageCount);
255 return LanguageTable [language].def->requestAutomaticFQTag;
256}
257
258static const char *getLanguageNameFull (const langType language, bool noPretending)
259{
260 const char* result;
261
262 if (language == LANG_IGNORE)
263 result = "unknown";
264 else
265 {
266 Assert (0 <= language && language < (int) LanguageCount);
267 if (noPretending)
268 result = LanguageTable [language].def->name;
269 else
270 {
271 langType real_language = LanguageTable [language].pretendingAsLanguage;
272 if (real_language == LANG_IGNORE)
273 result = LanguageTable [language].def->name;
274 else
275 {
276 Assert (0 <= real_language && real_language < (int) LanguageCount);
277 result = LanguageTable [real_language].def->name;
278 }
279 }
280 }
281 return result;
282}
283
284extern const char *getLanguageName (const langType language)
285{
286 return getLanguageNameFull (language, false);
287}
288
289extern const char *getLanguageKindName (const langType language, const int kindIndex)
290{
291 kindDefinition* kdef = getLanguageKind (language, kindIndex);
292 return kdef->name;
293}
294
297 .name = KIND_GHOST_NAME,
298 .description = KIND_GHOST_NAME,
299};
300
301extern int defineLanguageKind (const langType language, kindDefinition *def,
302 freeKindDefFunc freeKindDef)
303{
304 return defineKind (LanguageTable [language].kindControlBlock, def, freeKindDef);
305}
306
307extern unsigned int countLanguageKinds (const langType language)
308{
309 return countKinds (LanguageTable [language].kindControlBlock);
310}
311
312extern unsigned int countLanguageRoles (const langType language, int kindIndex)
313{
314 return countRoles (LanguageTable [language].kindControlBlock, kindIndex);
315}
316
317extern kindDefinition* getLanguageKind (const langType language, int kindIndex)
318{
319 kindDefinition* kdef;
320
321 Assert (0 <= language && language < (int) LanguageCount);
322
323 switch (kindIndex)
324 {
325 case KIND_FILE_INDEX:
326 kdef = LanguageTable [language].fileKind;
327 break;
328 case KIND_GHOST_INDEX:
329 kdef = &kindGhost;
330 break;
331 default:
332 Assert (kindIndex >= 0);
333 kdef = getKind (LanguageTable [language].kindControlBlock, kindIndex);
334 }
335 return kdef;
336}
337
338extern kindDefinition* getLanguageKindForLetter (const langType language, char kindLetter)
339{
340 Assert (0 <= language && language < (int) LanguageCount);
341 if (kindLetter == LanguageTable [language].fileKind->letter)
342 return LanguageTable [language].fileKind;
343 else if (kindLetter == KIND_GHOST_LETTER)
344 return &kindGhost;
345 else
346 return getKindForLetter (LanguageTable [language].kindControlBlock, kindLetter);
347}
348
349extern kindDefinition* getLanguageKindForName (const langType language, const char *kindName)
350{
351 Assert (0 <= language && language < (int) LanguageCount);
352 Assert (kindName);
353
354 if (strcmp(kindName, LanguageTable [language].fileKind->name) == 0)
355 return LanguageTable [language].fileKind;
356 else if (strcmp(kindName, KIND_GHOST_NAME) == 0)
357 return &kindGhost;
358 else
359 return getKindForName (LanguageTable [language].kindControlBlock, kindName);
360}
361
362extern roleDefinition* getLanguageRole(const langType language, int kindIndex, int roleIndex)
363{
364 return getRole (LanguageTable [language].kindControlBlock, kindIndex, roleIndex);
365}
366
367extern roleDefinition* getLanguageRoleForName (const langType language, int kindIndex,
368 const char *roleName)
369{
370 return getRoleForName (LanguageTable [language].kindControlBlock, kindIndex, roleName);
371}
372
373extern langType getNamedLanguageFull (const char *const name, size_t len, bool noPretending)
374{
375 langType result = LANG_IGNORE;
376 unsigned int i;
377 Assert (name != NULL);
378
379 if (len == 0)
380 {
382 if (def)
383 result = def->id;
384 }
385 else
386 for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i)
387 {
388 const parserDefinition* const lang = LanguageTable [i].def;
389 Assert (lang->name);
390 vString* vstr = vStringNewInit (name);
391 vStringTruncate (vstr, len);
392
393 if (strcasecmp (vStringValue (vstr), lang->name) == 0)
394 result = i;
395 vStringDelete (vstr);
396 }
397
398 if (result != LANG_IGNORE
399 && (!noPretending)
400 && LanguageTable [result].pretendedAsLanguage != LANG_IGNORE)
401 result = LanguageTable [result].pretendedAsLanguage;
402
403 return result;
404}
405
406extern langType getNamedLanguage (const char *const name, size_t len)
407{
408 return getNamedLanguageFull (name, len, false);
409}
410
411static langType getNameOrAliasesLanguageAndSpec (const char *const key, langType start_index,
412 const char **const spec, enum specType *specType)
413{
414 langType result = LANG_IGNORE;
415 unsigned int i;
416
417
418 if (start_index == LANG_AUTO)
419 start_index = 0;
420 else if (start_index == LANG_IGNORE || start_index >= (int) LanguageCount)
421 return result;
422
423 for (i = start_index ; i < LanguageCount && result == LANG_IGNORE ; ++i)
424 {
425 const parserObject* const parser = LanguageTable + i;
426 stringList* const aliases = parser->currentAliases;
427 vString* tmp;
428
429 /* isLanguageEnabled is not used here.
430 It calls initializeParser which takes
431 cost. */
432 if (! parser->def->enabled)
433 continue;
434
435 if (parser->def->name != NULL && strcasecmp (key, parser->def->name) == 0)
436 {
437 result = i;
438 *spec = parser->def->name;
440 }
441 else if (aliases != NULL && (tmp = stringListFileFinds (aliases, key)))
442 {
443 result = i;
444 *spec = vStringValue(tmp);
446 }
447 }
448 return result;
449}
450
451extern langType getLanguageForCommand (const char *const command, langType startFrom)
452{
453 const char *const tmp_command = baseFilename (command);
454 char *tmp_spec;
455 enum specType tmp_specType;
456
457 return getNameOrAliasesLanguageAndSpec (tmp_command, startFrom,
458 (const char **const)&tmp_spec,
459 &tmp_specType);
460}
461
462static langType getPatternLanguageAndSpec (const char *const baseName, langType start_index,
463 const char **const spec, enum specType *specType)
464{
465 langType result = LANG_IGNORE;
466 unsigned int i;
467
468 if (start_index == LANG_AUTO)
469 start_index = 0;
470 else if (start_index == LANG_IGNORE || start_index >= (int) LanguageCount)
471 return result;
472
473 *spec = NULL;
474 for (i = start_index ; i < LanguageCount && result == LANG_IGNORE ; ++i)
475 {
476 parserObject *parser = LanguageTable + i;
477 stringList* const ptrns = parser->currentPatterns;
478 vString* tmp;
479
480 /* isLanguageEnabled is not used here.
481 It calls initializeParser which takes
482 cost. */
483 if (! parser->def->enabled)
484 continue;
485
486 if (ptrns != NULL && (tmp = stringListFileFinds (ptrns, baseName)))
487 {
488 result = i;
489 *spec = vStringValue(tmp);
491 goto found;
492 }
493 }
494
495 for (i = start_index ; i < LanguageCount && result == LANG_IGNORE ; ++i)
496 {
497 parserObject *parser = LanguageTable + i;
498 stringList* const exts = parser->currentExtensions;
499 vString* tmp;
500
501 /* isLanguageEnabled is not used here.
502 It calls initializeParser which takes
503 cost. */
504 if (! parser->def->enabled)
505 continue;
506
507 if (exts != NULL && (tmp = stringListExtensionFinds (exts,
508 fileExtension (baseName))))
509 {
510 result = i;
511 *spec = vStringValue(tmp);
513 goto found;
514 }
515 }
516found:
517 return result;
518}
519
520extern langType getLanguageForFilename (const char *const filename, langType startFrom)
521{
522 const char *const tmp_filename = baseFilename (filename);
523 char *tmp_spec;
524 enum specType tmp_specType;
525
526 return getPatternLanguageAndSpec (tmp_filename, startFrom,
527 (const char **const)&tmp_spec,
528 &tmp_specType);
529}
530
531const char *scopeSeparatorFor (langType language, int kindIndex, int parentKindIndex)
532{
533 Assert (0 <= language && language < (int) LanguageCount);
534
535 parserObject *parser = LanguageTable + language;
536 struct kindControlBlock *kcb = parser->kindControlBlock;
537
538 const scopeSeparator *sep = getScopeSeparator (kcb, kindIndex, parentKindIndex);
539 return sep? sep->separator: NULL;
540}
541
542static bool processLangDefineScopesep(const langType language,
543 const char *const option,
544 const char *const parameter)
545{
546 parserObject *parser;
547 const char * p = parameter;
548
549
550 char parentKletter;
551 int parentKindex = KIND_FILE_INDEX;
552 char kletter;
553 int kindex = KIND_FILE_INDEX;
554 const char *separator;
555
556 Assert (0 <= language && language < (int) LanguageCount);
557 parser = LanguageTable + language;
558
559
560 /*
561 * Parent
562 */
563 parentKletter = p[0];
564
565 if (parentKletter == '\0')
566 error (FATAL, "no scope separator specified in \"--%s\" option", option);
567 else if (parentKletter == '/')
568 parentKindex = KIND_GHOST_INDEX;
569 else if (parentKletter == KIND_WILDCARD_LETTER)
570 parentKindex = KIND_WILDCARD_INDEX;
571 else if (parentKletter == KIND_FILE_DEFAULT_LETTER)
572 error (FATAL,
573 "the kind letter `%c' in \"--%s\" option is reserved for \"%s\" kind and no separator can be assigned to",
575 else if (isalpha (parentKletter))
576 {
577 kindDefinition *kdef = getKindForLetter (parser->kindControlBlock, parentKletter);
578 if (kdef == NULL)
579 error (FATAL,
580 "the kind for letter `%c' specified in \"--%s\" option is not defined.",
581 parentKletter, option);
582 parentKindex = kdef->id;
583 }
584 else
585 error (FATAL,
586 "the kind letter `%c` given in \"--%s\" option is not an alphabet",
587 parentKletter, option);
588
589
590 /*
591 * Child
592 */
593 if (parentKindex == KIND_GHOST_INDEX)
594 kletter = p[1];
595 else
596 {
597 if (p[1] != '/')
598 error (FATAL,
599 "wrong separator specification in \"--%s\" option: no slash after parent kind letter `%c'",
600 option, parentKletter);
601 kletter = p[2];
602 }
603
604 if (kletter == '\0')
605 error (FATAL, "no child kind letter in \"--%s\" option", option);
606 else if (kletter == '/')
607 error (FATAL,
608 "wrong separator specification in \"--%s\" option: don't specify slash char twice: %s",
609 option, parameter);
610 else if (kletter == ':')
611 error (FATAL,
612 "no child kind letter in \"--%s\" option", option);
613 else if (kletter == KIND_WILDCARD_LETTER)
614 {
615 if (parentKindex != KIND_WILDCARD_INDEX
616 && parentKindex != KIND_GHOST_INDEX)
617 error (FATAL,
618 "cannot use wild card for child kind unless parent kind is also wild card or empty");
619 kindex = KIND_WILDCARD_INDEX;
620 }
621 else if (kletter == KIND_FILE_DEFAULT_LETTER)
622 error (FATAL,
623 "the kind letter `%c' in \"--%s\" option is reserved for \"%s\" kind and no separator can be assigned to",
625 else if (isalpha (kletter))
626 {
627 kindDefinition *kdef = getKindForLetter (parser->kindControlBlock, kletter);
628 if (kdef == NULL)
629 error (FATAL,
630 "the kind for letter `%c' specified in \"--%s\" option is not defined.",
631 kletter, option);
632 kindex = kdef->id;
633 }
634 else
635 error (FATAL,
636 "the kind letter `%c` given in \"--%s\" option is not an alphabet",
637 kletter, option);
638
639 /*
640 * Separator
641 */
642 if (parentKindex == KIND_GHOST_INDEX)
643 {
644 if (p[2] != ':')
645 error (FATAL,
646 "wrong separator specification in \"--%s\" option: cannot find a colon after child kind: %s",
647 option, parameter);
648 separator = p + 3;
649 }
650 else
651 {
652 if (p[3] != ':')
653 error (FATAL,
654 "wrong separator specification in \"--%s\" option: cannot find a colon after child kind: %s",
655 option, parameter);
656 separator = p + 4;
657 }
658
659 Assert (parentKindex != KIND_FILE_INDEX);
660 Assert (kindex != KIND_FILE_INDEX);
661 defineScopeSeparator (parser->kindControlBlock, kindex, parentKindex, separator);
662 return true;
663}
664
665extern bool processScopesepOption (const char *const option, const char * const parameter)
666{
667 langType language;
668
669 language = getLanguageComponentInOption (option, "_scopesep-");
670 if (language == LANG_IGNORE)
671 return false;
672
673 return processLangDefineScopesep (language, option, parameter);
674}
675
677{
678 parserCandidate* candidates;
679 unsigned int i;
680
682 for (i = 0; i < LanguageCount; i++)
683 {
684 candidates[i].lang = LANG_IGNORE;
685 candidates[i].spec = NULL;
686 candidates[i].specType = SPEC_NONE;
687 }
688 return candidates;
689}
690
691/* If multiple parsers are found, return LANG_AUTO */
692static unsigned int nominateLanguageCandidates (const char *const key, parserCandidate** candidates)
693{
694 unsigned int count;
695 langType i;
696 const char* spec = NULL;
698
699 *candidates = parserCandidateNew(LanguageCount);
700
701 for (count = 0, i = LANG_AUTO; i != LANG_IGNORE; )
702 {
703 i = getNameOrAliasesLanguageAndSpec (key, i, &spec, &specType);
704 if (i != LANG_IGNORE)
705 {
706 (*candidates)[count].lang = i++;
707 (*candidates)[count].spec = spec;
708 (*candidates)[count++].specType = specType;
709 }
710 }
711
712 return count;
713}
714
715static unsigned int
716nominateLanguageCandidatesForPattern(const char *const baseName, parserCandidate** candidates)
717{
718 unsigned int count;
719 langType i;
720 const char* spec;
722
723 *candidates = parserCandidateNew(LanguageCount);
724
725 for (count = 0, i = LANG_AUTO; i != LANG_IGNORE; )
726 {
727 i = getPatternLanguageAndSpec (baseName, i, &spec, &specType);
728 if (i != LANG_IGNORE)
729 {
730 (*candidates)[count].lang = i++;
731 (*candidates)[count].spec = spec;
732 (*candidates)[count++].specType = specType;
733 }
734 }
735 return count;
736}
737
739
740/* The name of the language interpreter, either directly or as the argument
741 * to "env".
742 */
743static vString* determineInterpreter (const char* const cmd)
744{
745 vString* const interpreter = vStringNew ();
746 const char* p = cmd;
747 do
748 {
749 vStringClear (interpreter);
750 for ( ; isspace ((int) *p) ; ++p)
751 ; /* no-op */
752 for ( ; *p != '\0' && ! isspace ((int) *p) ; ++p)
753 vStringPut (interpreter, (int) *p);
754 } while (strcmp (vStringValue (interpreter), "env") == 0);
755 return interpreter;
756}
757
759{
760 vString* const vLine = vStringNew ();
761 const char* const line = readLineRaw (vLine, input);
762 vString* interpreter = NULL;
763
764 if (line != NULL && line [0] == '#' && line [1] == '!')
765 {
766 /* "48.2.4.1 Specifying File Variables" of Emacs info:
767 ---------------------------------------------------
768 In shell scripts, the first line is used to
769 identify the script interpreter, so you
770 cannot put any local variables there. To
771 accommodate this, Emacs looks for local
772 variable specifications in the _second_
773 line if the first line specifies an
774 interpreter. */
775
776 interpreter = extractEmacsModeAtFirstLine(input);
777 if (!interpreter)
778 {
779 const char* const lastSlash = strrchr (line, '/');
780 const char *const cmd = lastSlash != NULL ? lastSlash+1 : line+2;
781 interpreter = determineInterpreter (cmd);
782 }
783 }
784 vStringDelete (vLine);
785 return interpreter;
786}
787
788static vString* determineEmacsModeAtFirstLine (const char* const line)
789{
790 vString* mode = vStringNew ();
791
792 const char* p = strstr(line, "-*-");
793 if (p == NULL)
794 goto out;
795 p += strlen("-*-");
796
797 for ( ; isspace ((int) *p) ; ++p)
798 ; /* no-op */
799
800 if (strncasecmp(p, "mode:", strlen("mode:")) == 0)
801 {
802 /* -*- mode: MODE; -*- */
803 p += strlen("mode:");
804 for ( ; isspace ((int) *p) ; ++p)
805 ; /* no-op */
806 for ( ; *p != '\0' && isLanguageNameChar ((int) *p) ; ++p)
807 vStringPut (mode, (int) *p);
808 }
809 else
810 {
811 /* -*- MODE -*- */
812 const char* end = strstr (p, "-*-");
813
814 if (end == NULL)
815 goto out;
816
817 for ( ; p < end && isLanguageNameChar ((int) *p) ; ++p)
818 vStringPut (mode, (int) *p);
819
820 for ( ; isspace ((int) *p) ; ++p)
821 ; /* no-op */
822 if (strncmp(p, "-*-", strlen("-*-")) != 0)
823 vStringClear (mode);
824 }
825
826 vStringLower (mode);
827
828out:
829 return mode;
830
831}
832
834{
835 vString* const vLine = vStringNew ();
836 const char* const line = readLineRaw (vLine, input);
837 vString* mode = NULL;
838 if (line != NULL)
840 vStringDelete (vLine);
841
842 if (mode && (vStringLength(mode) == 0))
843 {
844 vStringDelete(mode);
845 mode = NULL;
846 }
847 return mode;
848}
849
851{
852 vString* const vLine = vStringNew ();
853 const char* line;
854 bool headerFound = false;
855 const char* p;
856 vString* mode = vStringNew ();
857
858 while ((line = readLineRaw (vLine, fp)) != NULL)
859 {
860 if (headerFound && ((p = strstr (line, "mode:")) != NULL))
861 {
862 vStringClear (mode);
863 headerFound = false;
864
865 p += strlen ("mode:");
866 for ( ; isspace ((int) *p) ; ++p)
867 ; /* no-op */
868 for ( ; *p != '\0' && isLanguageNameChar ((int) *p) ; ++p)
869 vStringPut (mode, (int) *p);
870 }
871 else if (headerFound && (p = strstr(line, "End:")))
872 headerFound = false;
873 else if (strstr (line, "Local Variables:"))
874 headerFound = true;
875 }
876 vStringDelete (vLine);
877 return mode;
878}
879
881{
882 vString* mode;
883
884 /* "48.2.4.1 Specifying File Variables" of Emacs info:
885 ---------------------------------------------------
886 you can define file local variables using a "local
887 variables list" near the end of the file. The start of the
888 local variables list should be no more than 3000 characters
889 from the end of the file, */
890 mio_seek(input, -3000, SEEK_END);
891
892 mode = determineEmacsModeAtEOF (input);
893 if (mode && (vStringLength (mode) == 0))
894 {
895 vStringDelete (mode);
896 mode = NULL;
897 }
898
899 return mode;
900}
901
902static vString* determineVimFileType (const char *const modeline)
903{
904 /* considerable combinations:
905 --------------------------
906 ... filetype=
907 ... ft= */
908
909 unsigned int i;
910 const char* p;
911
912 const char* const filetype_prefix[] = {"filetype=", "ft="};
913 vString* const filetype = vStringNew ();
914
915 for (i = 0; i < ARRAY_SIZE(filetype_prefix); i++)
916 {
917 if ((p = strrstr(modeline, filetype_prefix[i])) == NULL)
918 continue;
919
920 p += strlen(filetype_prefix[i]);
921 for ( ; *p != '\0' && isalnum ((int) *p) ; ++p)
922 vStringPut (filetype, (int) *p);
923 break;
924 }
925 return filetype;
926}
927
929{
930 /* http://vimdoc.sourceforge.net/htmldoc/options.html#modeline
931
932 [text]{white}{vi:|vim:|ex:}[white]se[t] {options}:[text]
933 options=> filetype=TYPE or ft=TYPE
934
935 'modelines' 'mls' number (default 5)
936 global
937 {not in Vi}
938 If 'modeline' is on 'modelines' gives the number of lines that is
939 checked for set commands. */
940
941 vString* filetype = NULL;
942#define RING_SIZE 5
943 vString* ring[RING_SIZE];
944 int i, j;
945 unsigned int k;
946 const char* const prefix[] = {
947 "vim:", "vi:", "ex:"
948 };
949
950 for (i = 0; i < RING_SIZE; i++)
951 ring[i] = vStringNew ();
952
953 i = 0;
954 while ((readLineRaw (ring[i++], input)) != NULL)
955 if (i == RING_SIZE)
956 i = 0;
957
958 j = i;
959 do
960 {
961 const char* p;
962
963 j--;
964 if (j < 0)
965 j = RING_SIZE - 1;
966
967 for (k = 0; k < ARRAY_SIZE(prefix); k++)
968 if ((p = strstr (vStringValue (ring[j]), prefix[k])) != NULL)
969 {
970 p += strlen(prefix[k]);
971 for ( ; isspace ((int) *p) ; ++p)
972 ; /* no-op */
973 filetype = determineVimFileType(p);
974 break;
975 }
976 } while (((i == RING_SIZE)? (j != RING_SIZE - 1): (j != i)) && (!filetype));
977
978 for (i = RING_SIZE - 1; i >= 0; i--)
979 vStringDelete (ring[i]);
980#undef RING_SIZE
981
982 if (filetype && (vStringLength (filetype) == 0))
983 {
984 vStringDelete (filetype);
985 filetype = NULL;
986 }
987 return filetype;
988
989 /* TODO:
990 [text]{white}{vi:|vim:|ex:}[white]{options} */
991}
992
994 vString * (* determiner)(const char *const, void *),
995 void *data)
996{
997 vString* const vLine = vStringNew ();
998 const char* const line = readLineRaw (vLine, input);
999 vString* mode = NULL;
1000
1001 if (line)
1002 mode = determiner (line, data);
1003
1004 vStringDelete (vLine);
1005 return mode;
1006}
1007
1008static vString* determineZshAutoloadTag (const char *const modeline,
1009 void *data CTAGS_ATTR_UNUSED)
1010{
1011 /* See "Autoloaded files" in zsh info.
1012 -------------------------------------
1013 #compdef ...
1014 #autoload [ OPTIONS ] */
1015
1016 if (((strncmp (modeline, "#compdef", 8) == 0) && isspace (*(modeline + 8)))
1017 || ((strncmp (modeline, "#autoload", 9) == 0)
1018 && (isspace (*(modeline + 9)) || *(modeline + 9) == '\0')))
1019 return vStringNewInit ("zsh");
1020 else
1021 return NULL;
1022}
1023
1025{
1027}
1028
1029static vString* determinePHPMark(const char *const modeline,
1030 void *data CTAGS_ATTR_UNUSED)
1031{
1032 if (strncmp (modeline, "<?php", 5) == 0)
1033 return vStringNewInit ("php");
1034 else
1035 return NULL;
1036}
1037
1039{
1040 return extractMarkGeneric (input, determinePHPMark, NULL);
1041}
1042
1043
1045 const char *fileName;
1047 bool err;
1048};
1049
1050#define GLC_FOPEN_IF_NECESSARY0(_glc_, _label_) do { \
1051 if (!(_glc_)->input) { \
1052 (_glc_)->input = getMio((_glc_)->fileName, "rb", false); \
1053 if (!(_glc_)->input) { \
1054 (_glc_)->err = true; \
1055 goto _label_; \
1056 } \
1057 } \
1058} while (0) \
1059
1060#define GLC_FOPEN_IF_NECESSARY(_glc_, _label_, _doesParserRequireMemoryStream_) \
1061 do { \
1062 if (!(_glc_)->input) \
1063 GLC_FOPEN_IF_NECESSARY0 (_glc_, _label_); \
1064 if ((_doesParserRequireMemoryStream_) && \
1065 (mio_memory_get_data((_glc_)->input, NULL) == NULL)) \
1066 { \
1067 MIO *tmp_ = (_glc_)->input; \
1068 (_glc_)->input = mio_new_mio (tmp_, 0, -1); \
1069 mio_unref (tmp_); \
1070 if (!(_glc_)->input) { \
1071 (_glc_)->err = true; \
1072 goto _label_; \
1073 } \
1074 } \
1075 } while (0)
1076
1077#define GLC_FCLOSE(_glc_) do { \
1078 if ((_glc_)->input) { \
1079 mio_unref((_glc_)->input); \
1080 (_glc_)->input = NULL; \
1081 } \
1082} while (0)
1083
1084static const struct taster {
1085 vString* (* taste) (MIO *);
1086 const char *msg;
1087} eager_tasters[] = {
1088 {
1090 .msg = "interpreter",
1091 },
1092 {
1093 .taste = extractZshAutoloadTag,
1094 .msg = "zsh autoload tag",
1095 },
1096 {
1098 .msg = "emacs mode at the first line",
1099 },
1100 {
1102 .msg = "emacs mode at the EOF",
1103 },
1104 {
1105 .taste = extractVimFileType,
1106 .msg = "vim modeline",
1107 },
1108 {
1109 .taste = extractPHPMark,
1110 .msg = "PHP marker",
1111 }
1113static langType tasteLanguage (struct getLangCtx *glc, const struct taster *const tasters, int n_tasters,
1115
1116/* If all the candidates have the same specialized language selector, return
1117 * it. Otherwise, return NULL.
1118 */
1119static bool
1121{
1122 selectLanguage *selector;
1123
1124 selector = LanguageTable[ lang ].def->selectLanguage;
1125 if (selector == NULL)
1126 return false;
1127
1128 while (*selector)
1129 {
1130 if (*selector == candidate_selector)
1131 return true;
1132 selector++;
1133 }
1134 return false;
1135}
1136
1137static selectLanguage
1138commonSelector (const parserCandidate *candidates, int n_candidates)
1139{
1140 Assert (n_candidates > 1);
1141 selectLanguage *selector;
1142 int i;
1143
1144 selector = LanguageTable[ candidates[0].lang ].def->selectLanguage;
1145 if (selector == NULL)
1146 return NULL;
1147
1148 while (*selector)
1149 {
1150 for (i = 1; i < n_candidates; ++i)
1151 if (! hasTheSameSelector (candidates[i].lang, *selector))
1152 break;
1153 if (i == n_candidates)
1154 return *selector;
1155 selector++;
1156 }
1157 return NULL;
1158}
1159
1160
1161/* Calls the selector and returns the integer value of the parser for the
1162 * language associated with the string returned by the selector.
1163 */
1164static int
1166 parserCandidate *candidates,
1167 unsigned int nCandidates)
1168{
1169 const char *lang;
1170 langType *cs = xMalloc(nCandidates, langType);
1171 unsigned int i;
1172
1173 for (i = 0; i < nCandidates; i++)
1174 cs[i] = candidates[i].lang;
1175 lang = selector(input, cs, nCandidates);
1176 eFree (cs);
1177
1178 if (lang)
1179 {
1180 verbose (" selection: %s\n", lang);
1181 return getNamedLanguage(lang, 0);
1182 }
1183 else
1184 {
1185 verbose (" no selection\n");
1186 return LANG_IGNORE;
1187 }
1188}
1189
1190static int compareParsersByName (const void *a, const void* b)
1191{
1192 const parserDefinition *const *la = a, *const *lb = b;
1193 return strcasecmp ((*la)->name, (*lb)->name);
1194}
1195
1196static int sortParserCandidatesBySpecType (const void *a, const void *b)
1197{
1198 const parserCandidate *ap = a, *bp = b;
1199 if (ap->specType > bp->specType)
1200 return -1;
1201 else if (ap->specType == bp->specType)
1202 {
1203 /* qsort, the function calling this function,
1204 doesn't do "stable sort". To make the result of
1205 sorting predictable, compare the names of parsers
1206 when their specType is the same. */
1208 parserDefinition *lb = LanguageTable [bp->lang].def;
1209 return compareParsersByName (&la, &lb);
1210 }
1211 else
1212 return 1;
1213}
1214
1215static unsigned int sortAndFilterParserCandidates (parserCandidate *candidates,
1216 unsigned int n_candidates)
1217{
1218 enum specType highestSpecType;
1219 unsigned int i;
1220 unsigned int r;
1221
1222 if (n_candidates < 2)
1223 return n_candidates;
1224
1225 qsort (candidates, n_candidates, sizeof(*candidates),
1227
1228 highestSpecType = candidates [0].specType;
1229 r = 1;
1230 for (i = 1; i < n_candidates; i++)
1231 {
1232 if (candidates[i].specType == highestSpecType)
1233 r++;
1234 }
1235 return r;
1236}
1237
1238static void verboseReportCandidate (const char *header,
1239 parserCandidate *candidates,
1240 unsigned int n_candidates)
1241{
1242 unsigned int i;
1243 verbose (" #%s: %u\n", header, n_candidates);
1244 for (i = 0; i < n_candidates; i++)
1245 verbose (" %u: %s (%s: \"%s\")\n",
1246 i,
1247 LanguageTable[candidates[i].lang].def->name,
1248 specTypeName [candidates[i].specType],
1249 candidates[i].spec);
1250}
1251
1253 int n_candidates)
1254{
1255 int i;
1256
1257 for (i = 0; i < n_candidates; i++)
1258 if (doesParserRequireMemoryStream (candidates[i].lang))
1259 return true;
1260
1261 return false;
1262}
1263
1264static langType getSpecLanguageCommon (const char *const spec, struct getLangCtx *glc,
1265 unsigned int nominate (const char *const, parserCandidate**),
1267{
1268 langType language;
1269 parserCandidate *candidates;
1270 unsigned int n_candidates;
1271
1272 if (fallback)
1274
1275 n_candidates = (*nominate)(spec, &candidates);
1276 verboseReportCandidate ("candidates",
1277 candidates, n_candidates);
1278
1279 n_candidates = sortAndFilterParserCandidates (candidates, n_candidates);
1280 verboseReportCandidate ("candidates after sorting and filtering",
1281 candidates, n_candidates);
1282
1283 if (n_candidates == 1)
1284 {
1285 language = candidates[0].lang;
1286 }
1287 else if (n_candidates > 1)
1288 {
1289 selectLanguage selector = commonSelector(candidates, n_candidates);
1290 bool memStreamRequired = doesCandidatesRequireMemoryStream (candidates,
1291 n_candidates);
1292
1293 GLC_FOPEN_IF_NECESSARY(glc, fopen_error, memStreamRequired);
1294 if (selector) {
1295 verbose (" selector: %p\n", selector);
1296 language = pickLanguageBySelection(selector, glc->input, candidates, n_candidates);
1297 } else {
1298 verbose (" selector: NONE\n");
1299 fopen_error:
1300 language = LANG_IGNORE;
1301 }
1302
1303 Assert(language != LANG_AUTO);
1304
1305 if (fallback)
1306 *fallback = candidates[0].lang;
1307 }
1308 else
1309 {
1310 language = LANG_IGNORE;
1311 }
1312
1313 eFree(candidates);
1314 candidates = NULL;
1315
1316 return language;
1317}
1318
1319static langType getSpecLanguage (const char *const spec,
1320 struct getLangCtx *glc,
1322{
1324 fallback);
1325}
1326
1327static langType getPatternLanguage (const char *const baseName,
1328 struct getLangCtx *glc,
1330{
1331 return getSpecLanguageCommon(baseName, glc,
1333 fallback);
1334}
1335
1336/* This function tries to figure out language contained in a file by
1337 * running a series of tests, trying to find some clues in the file.
1338 */
1339static langType
1340tasteLanguage (struct getLangCtx *glc, const struct taster *const tasters, int n_tasters,
1342{
1343 int i;
1344
1345 if (fallback)
1347 for (i = 0; i < n_tasters; ++i) {
1348 langType language;
1349 vString* spec;
1350
1351 mio_rewind(glc->input);
1352 spec = tasters[i].taste(glc->input);
1353
1354 if (NULL != spec) {
1355 verbose (" %s: %s\n", tasters[i].msg, vStringValue (spec));
1356 language = getSpecLanguage (vStringValue (spec), glc,
1358 vStringDelete (spec);
1359 if (language != LANG_IGNORE)
1360 return language;
1361 }
1362 }
1363
1364 return LANG_IGNORE;
1365}
1366
1367
1370 const char *const fileName;
1372};
1373
1374static langType
1376{
1377 const char *const fileName = req->fileName;
1378 langType language;
1379
1380 /* ctags tries variety ways(HINTS) to choose a proper language
1381 for given fileName. If multiple candidates are chosen in one of
1382 the hint, a SELECTOR common between the candidate languages
1383 is called.
1384
1385 "selection failure" means a selector common between the
1386 candidates doesn't exist or the common selector returns NULL.
1387
1388 "hint failure" means the hint finds no candidate or
1389 "selection failure" occurs though the hint finds multiple
1390 candidates.
1391
1392 If a hint chooses multiple candidates, and selection failure is
1393 occurred, the hint records one of the candidates as FALLBACK for
1394 the hint. (The candidates are stored in an array. The first
1395 element of the array is recorded. However, there is no
1396 specification about the order of elements in the array.)
1397
1398 If all hints are failed, FALLBACKs of the hints are examined.
1399 Which fallbacks should be chosen? `enum hint' defines the order. */
1400 enum hint {
1401 HINT_INTERP,
1402 HINT_OTHER,
1403 HINT_FILENAME,
1404 HINT_TEMPLATE,
1405 N_HINTS,
1406 };
1407 langType fallback[N_HINTS];
1408 int i;
1409 struct getLangCtx glc = {
1410 .fileName = fileName,
1411 .input = (req->type == GLR_REUSE)? mio_ref (req->mio): NULL,
1412 .err = false,
1413 };
1414 const char* const baseName = baseFilename (fileName);
1415 char *templateBaseName = NULL;
1416 fileStatus *fstatus = NULL;
1417
1418 for (i = 0; i < N_HINTS; i++)
1419 fallback [i] = LANG_IGNORE;
1420
1421 verbose ("Get file language for %s\n", fileName);
1422
1423 verbose (" pattern: %s\n", baseName);
1424 language = getPatternLanguage (baseName, &glc,
1425 fallback + HINT_FILENAME);
1426 if (language != LANG_IGNORE || glc.err)
1427 goto cleanup;
1428
1429 {
1430 const char* const tExt = ".in";
1431 templateBaseName = baseFilenameSansExtensionNew (fileName, tExt);
1432 if (templateBaseName)
1433 {
1434 verbose (" pattern + template(%s): %s\n", tExt, templateBaseName);
1435 GLC_FOPEN_IF_NECESSARY(&glc, cleanup, false);
1436 mio_rewind(glc.input);
1437 language = getPatternLanguage(templateBaseName, &glc,
1438 fallback + HINT_TEMPLATE);
1439 if (language != LANG_IGNORE)
1440 goto cleanup;
1441 }
1442 }
1443
1444 /* If the input is already opened, we don't have to verify the existence. */
1445 if (glc.input || ((fstatus = eStat (fileName)) && fstatus->exists))
1446 {
1447 if ((fstatus && fstatus->isExecutable) || Option.guessLanguageEagerly)
1448 {
1449 GLC_FOPEN_IF_NECESSARY (&glc, cleanup, false);
1450 language = tasteLanguage(&glc, eager_tasters, 1,
1451 fallback + HINT_INTERP);
1452 }
1453 if (language != LANG_IGNORE)
1454 goto cleanup;
1455
1457 {
1458 GLC_FOPEN_IF_NECESSARY(&glc, cleanup, false);
1459 language = tasteLanguage(&glc,
1460 eager_tasters + 1,
1462 fallback + HINT_OTHER);
1463 }
1464 }
1465
1466
1467 cleanup:
1468 if (req->type == GLR_OPEN && glc.input)
1469 req->mio = mio_ref (glc.input);
1470 GLC_FCLOSE(&glc);
1471 if (fstatus)
1472 eStatFree (fstatus);
1473 if (templateBaseName)
1474 eFree (templateBaseName);
1475
1476 for (i = 0;
1477 language == LANG_IGNORE && i < N_HINTS;
1478 i++)
1479 {
1480 language = fallback [i];
1481 if (language != LANG_IGNORE)
1482 verbose (" fallback[hint = %d]: %s\n", i, getLanguageName (language));
1483 }
1484
1485 /* We cannot use isLanguageEnabled() here. */
1486 if (language == LANG_IGNORE
1487 && LanguageTable[LANG_FALLBACK].def->enabled)
1488 {
1489 language = LANG_FALLBACK;
1490 verbose (" last resort: using \"%s\" parser\n",
1492 }
1493 return language;
1494}
1495
1497{
1499
1500 if (l == LANG_AUTO)
1502 else if (! isLanguageEnabled (l))
1503 {
1504 error (FATAL,
1505 "%s parser specified with --language-force is disabled",
1506 getLanguageName (l));
1507 /* For suppressing warnings. */
1508 return LANG_AUTO;
1509 }
1510 else
1511 return Option.language;
1512}
1513
1515{
1516 struct GetLanguageRequest req = {
1517 .type = GLR_DISCARD,
1518 .fileName = fileName,
1519 };
1520
1521 return getFileLanguageForRequest (&req);
1522}
1523
1524typedef void (*languageCallback) (langType language, void* user_data);
1525static void foreachLanguage(languageCallback callback, void *user_data)
1526{
1527 langType result = LANG_IGNORE;
1528
1529 unsigned int i;
1530 for (i = 0 ; i < LanguageCount && result == LANG_IGNORE ; ++i)
1531 {
1532 const parserDefinition* const lang = LanguageTable [i].def;
1533 if (lang->name != NULL)
1534 callback(i, user_data);
1535 }
1536}
1537
1538static void printLanguageMap (const langType language, FILE *fp)
1539{
1540 bool first = true;
1541 unsigned int i;
1542 parserObject *parser = LanguageTable + language;
1543 stringList* map = parser->currentPatterns;
1544 Assert (0 <= language && language < (int) LanguageCount);
1545 for (i = 0 ; map != NULL && i < stringListCount (map) ; ++i)
1546 {
1547 fprintf (fp, "%s(%s)", (first ? "" : " "),
1548 vStringValue (stringListItem (map, i)));
1549 first = false;
1550 }
1551 map = parser->currentExtensions;
1552 for (i = 0 ; map != NULL && i < stringListCount (map) ; ++i)
1553 {
1554 fprintf (fp, "%s.%s", (first ? "" : " "),
1555 vStringValue (stringListItem (map, i)));
1556 first = false;
1557 }
1558}
1559
1560extern void installLanguageMapDefault (const langType language)
1561{
1562 parserObject* parser;
1563 Assert (0 <= language && language < (int) LanguageCount);
1564 parser = LanguageTable + language;
1565 if (parser->currentPatterns != NULL)
1567 if (parser->currentExtensions != NULL)
1569
1570 if (parser->def->patterns == NULL)
1571 parser->currentPatterns = stringListNew ();
1572 else
1573 {
1574 parser->currentPatterns =
1576 }
1577 if (parser->def->extensions == NULL)
1578 parser->currentExtensions = stringListNew ();
1579 else
1580 {
1581 parser->currentExtensions =
1583 }
1584 BEGIN_VERBOSE(vfp);
1585 {
1586 printLanguageMap (language, vfp);
1587 putc ('\n', vfp);
1588 }
1589 END_VERBOSE();
1590}
1591
1593{
1594 unsigned int i;
1595 for (i = 0 ; i < LanguageCount ; ++i)
1596 {
1597 verbose (" %s: ", getLanguageName (i));
1599 }
1600}
1601
1602extern void installLanguageAliasesDefault (const langType language)
1603{
1604 parserObject* parser;
1605 Assert (0 <= language && language < (int) LanguageCount);
1606 parser = LanguageTable + language;
1607 if (parser->currentAliases != NULL)
1609
1610 if (parser->def->aliases == NULL)
1611 parser->currentAliases = stringListNew ();
1612 else
1613 {
1614 parser->currentAliases =
1616 }
1617 BEGIN_VERBOSE(vfp);
1618 if (parser->currentAliases != NULL)
1619 for (unsigned int i = 0 ; i < stringListCount (parser->currentAliases) ; ++i)
1620 fprintf (vfp, " %s", vStringValue (
1621 stringListItem (parser->currentAliases, i)));
1622 putc ('\n', vfp);
1623 END_VERBOSE();
1624}
1625
1627{
1628 unsigned int i;
1629 for (i = 0 ; i < LanguageCount ; ++i)
1630 {
1631 verbose (" %s: ", getLanguageName (i));
1633 }
1634}
1635
1636extern void clearLanguageMap (const langType language)
1637{
1638 Assert (0 <= language && language < (int) LanguageCount);
1639 stringListClear ((LanguageTable + language)->currentPatterns);
1640 stringListClear ((LanguageTable + language)->currentExtensions);
1641}
1642
1643extern void clearLanguageAliases (const langType language)
1644{
1645 Assert (0 <= language && language < (int) LanguageCount);
1646
1647 parserObject* parser = (LanguageTable + language);
1648 if (parser->currentAliases)
1650}
1651
1652static bool removeLanguagePatternMap1(const langType language, const char *const pattern)
1653{
1654 bool result = false;
1655 stringList* const ptrn = (LanguageTable + language)->currentPatterns;
1656
1657 if (ptrn != NULL && stringListDeleteItemExtension (ptrn, pattern))
1658 {
1659 verbose (" (removed from %s)", getLanguageName (language));
1660 result = true;
1661 }
1662 return result;
1663}
1664
1665extern bool removeLanguagePatternMap (const langType language, const char *const pattern)
1666{
1667 bool result = false;
1668
1669 if (language == LANG_AUTO)
1670 {
1671 unsigned int i;
1672 for (i = 0 ; i < LanguageCount && ! result ; ++i)
1673 result = removeLanguagePatternMap1 (i, pattern) || result;
1674 }
1675 else
1676 result = removeLanguagePatternMap1 (language, pattern);
1677 return result;
1678}
1679
1680extern void addLanguagePatternMap (const langType language, const char* ptrn,
1681 bool exclusiveInAllLanguages)
1682{
1683 vString* const str = vStringNewInit (ptrn);
1684 parserObject* parser;
1685 Assert (0 <= language && language < (int) LanguageCount);
1686 parser = LanguageTable + language;
1687 if (exclusiveInAllLanguages)
1689 stringListAdd (parser->currentPatterns, str);
1690}
1691
1692static bool removeLanguageExtensionMap1 (const langType language, const char *const extension)
1693{
1694 bool result = false;
1695 stringList* const exts = (LanguageTable + language)->currentExtensions;
1696
1697 if (exts != NULL && stringListDeleteItemExtension (exts, extension))
1698 {
1699 verbose (" (removed from %s)", getLanguageName (language));
1700 result = true;
1701 }
1702 return result;
1703}
1704
1705extern bool removeLanguageExtensionMap (const langType language, const char *const extension)
1706{
1707 bool result = false;
1708
1709 if (language == LANG_AUTO)
1710 {
1711 unsigned int i;
1712 for (i = 0 ; i < LanguageCount ; ++i)
1713 result = removeLanguageExtensionMap1 (i, extension) || result;
1714 }
1715 else
1716 result = removeLanguageExtensionMap1 (language, extension);
1717 return result;
1718}
1719
1721 const langType language, const char* extension,
1722 bool exclusiveInAllLanguages)
1723{
1724 vString* const str = vStringNewInit (extension);
1725 Assert (0 <= language && language < (int) LanguageCount);
1726 if (exclusiveInAllLanguages)
1728 stringListAdd ((LanguageTable + language)->currentExtensions, str);
1729}
1730
1731extern void addLanguageAlias (const langType language, const char* alias)
1732{
1733 vString* const str = vStringNewInit (alias);
1734 parserObject* parser;
1735 Assert (0 <= language && language < (int) LanguageCount);
1736 parser = LanguageTable + language;
1737 if (parser->currentAliases == NULL)
1738 parser->currentAliases = stringListNew ();
1739 stringListAdd (parser->currentAliases, str);
1740}
1741
1742extern void enableLanguage (const langType language, const bool state)
1743{
1744 Assert (0 <= language && language < (int) LanguageCount);
1745 LanguageTable [language].def->enabled = state;
1746}
1747
1748#ifdef DO_TRACING
1749extern void traceLanguage (langType language)
1750{
1751 Assert (0 <= language && language < (int) LanguageCount);
1752 LanguageTable [language].def->traced = true;
1753}
1754extern bool isLanguageTraced (langType language)
1755{
1756 Assert (0 <= language && language < (int) LanguageCount);
1757 return LanguageTable [language].def->traced;
1758}
1759#endif /* DO_TRACING */
1760
1761extern void enableLanguages (const bool state)
1762{
1763 unsigned int i;
1764 for (i = 0 ; i < LanguageCount ; ++i)
1765 enableLanguage (i, state);
1766}
1767
1768static void installFieldDefinition (const langType language)
1769{
1770 unsigned int i;
1771 parserDefinition * parser;
1772
1773 Assert (0 <= language && language < (int) LanguageCount);
1774 parser = LanguageTable [language].def;
1775
1776 if (parser->fieldTable != NULL)
1777 {
1778 for (i = 0; i < parser->fieldCount; i++)
1779 defineField (& parser->fieldTable [i], language);
1780 }
1781}
1782
1783static void installXtagDefinition (const langType language)
1784{
1785 unsigned int i;
1786 parserDefinition * parser;
1787
1788 Assert (0 <= language && language < (int) LanguageCount);
1789 parser = LanguageTable [language].def;
1790
1791 if (parser->xtagTable != NULL)
1792 {
1793 for (i = 0; i < parser->xtagCount; i++)
1794 defineXtag (& parser->xtagTable [i], language);
1795 }
1796}
1797
1799{
1800 parserObject *const parser = LanguageTable + lang;
1801
1802 if (parser->initialized)
1803 goto out;
1804
1805 verbose ("Initialize parser: %s\n", parser->def->name);
1806 parser->initialized = true;
1807
1808 installKeywordTable (lang);
1809 installTagXpathTable (lang);
1811 installXtagDefinition (lang);
1812
1813 /* regex definitions refers xtag definitions.
1814 So installing RegexTable must be after installing
1815 xtag definitions. */
1816 installTagRegexTable (lang);
1817
1818 if (parser->def->initialize != NULL)
1819 parser->def->initialize (lang);
1820
1821 initializeDependencies (parser->def, parser->slaveControlBlock);
1822
1823 Assert (parser->fileKind != NULL);
1824 Assert (!doesParserUseKind (parser->kindControlBlock, parser->fileKind->letter));
1825
1826 return;
1827
1828 out:
1829 /* lazyInitialize() installs findRegexTags() to parser->parser.
1830 findRegexTags() should be installed to a parser if the parser is
1831 optlib based(created by --langdef) and has some regex patterns(defined
1832 with --regex-<LANG>). findRegexTags() makes regex matching work.
1833
1834 If a parser can be initialized during evaluating options,
1835 --fields-<LANG>=+{something}, for an example.
1836 If such option is evaluated first, evaluating --regex-<LANG>=...
1837 option doesn't cause installing findRegexTags. As the result
1838 regex matching doesn't work. lazyInitialize was called only
1839 once when --fields-<LANG>=+{something} was evaluated. In the
1840 timing ctags had not seen --regex-<LANG>=.... Even though
1841 ctags saw --regex-<LANG>=.... after initializing, there
1842 was no chance to install findRegexTags() to parser->parser.
1843
1844 Following code block gives extra chances to call lazyInitialize)
1845 which installs findRegexTags() to parser->parser. */
1846 if (parser->def->initialize == lazyInitialize)
1847 parser->def->initialize (lang);
1848}
1849
1850extern void initializeParser (langType lang)
1851{
1852 if (lang == LANG_AUTO)
1853 {
1854 unsigned int i;
1855 for (i = 0; i < countParsers(); i++)
1857 }
1858 else
1859 initializeParserOne (lang);
1860}
1861
1863{
1864 unsigned int i;
1866 langType upper;
1867 parserObject *upperParser;
1868
1869 for (i = 0; i < parser->dependencyCount; i++)
1870 {
1871 d = parser->dependencies + i;
1872 upper = getNamedLanguage (d->upperParser, 0);
1873 upperParser = LanguageTable + upper;
1874
1875 linkDependencyAtInitializeParsing (d->type, upperParser->def,
1876 upperParser->slaveControlBlock,
1877 upperParser->kindControlBlock,
1878 parser,
1879 (LanguageTable + parser->id)->kindControlBlock,
1880 d->data);
1881 }
1882}
1883
1884/* Used in both builtin and optlib parsers. */
1885static void initializeParsingCommon (parserDefinition *def, bool is_builtin)
1886{
1887 parserObject *parser;
1888
1889 if (is_builtin)
1890 verbose ("%s%s", LanguageCount > 0 ? ", " : "", def->name);
1891 else
1892 verbose ("Add optlib parser: %s\n", def->name);
1893
1894 def->id = LanguageCount++;
1895 parser = LanguageTable + def->id;
1896 parser->def = def;
1897
1899
1900 parser->fileKind = &defaultFileKind;
1901
1905}
1906
1907extern void initializeParsing (void)
1908{
1909 unsigned int builtInCount;
1910 unsigned int i;
1911
1912 builtInCount = ARRAY_SIZE (BuiltInParsers);
1913 LanguageTable = xMalloc (builtInCount, parserObject);
1914 memset(LanguageTable, 0, builtInCount * sizeof (parserObject));
1915 for (i = 0; i < builtInCount; ++i)
1916 {
1919 }
1920
1924 NULL,
1925 NULL);
1927
1928 verbose ("Installing parsers: ");
1929 for (i = 0 ; i < builtInCount ; ++i)
1930 {
1931 parserDefinition* const def = (*BuiltInParsers [i]) ();
1932 if (def != NULL)
1933 {
1934 Assert (def->name);
1935 Assert (def->name[0] != '\0');
1936 Assert (strcmp (def->name, RSV_LANG_ALL));
1937 Assert (strpbrk (def->name, "!\"$%&'()*,-./:;<=>?@[\\]^`|~") == NULL);
1938
1939 if (def->method & METHOD_NOT_CRAFTED)
1940 def->parser = findRegexTags;
1941 else
1942 /* parser definition must define one and only one parsing routine */
1943 Assert ((!!def->parser) + (!!def->parser2) == 1);
1944
1945 initializeParsingCommon (def, true);
1946 }
1947 }
1948 verbose ("\n");
1949
1950 for (i = 0; i < builtInCount ; ++i)
1952}
1953
1954extern void freeParserResources (void)
1955{
1956 unsigned int i;
1957 for (i = 0 ; i < LanguageCount ; ++i)
1958 {
1959 parserObject* const parser = LanguageTable + i;
1960
1961 if (parser->def->finalize)
1962 (parser->def->finalize)((langType)i, (bool)parser->initialized);
1963
1965
1968 parser->kindControlBlock = NULL;
1969
1970 finalizeDependencies (parser->def, parser->slaveControlBlock);
1972 parser->slaveControlBlock = NULL;
1973
1974 freeList (&parser->currentPatterns);
1975 freeList (&parser->currentExtensions);
1976 freeList (&parser->currentAliases);
1977
1978 eFree (parser->def->name);
1979 parser->def->name = NULL;
1980 eFree (parser->def);
1981 parser->def = NULL;
1982 }
1983 if (LanguageTable != NULL)
1986 LanguageCount = 0;
1987}
1988
1989static void doNothing (void)
1990{
1991}
1992
1993static void optlibRunBaseParser (void)
1994{
1996}
1997
1999{
2000 return (def->dependencies
2001 && (def->dependencies->type == DEPTYPE_SUBPARSER)
2002 && ((subparser *)def->dependencies->data)->direction & SUBPARSER_SUB_RUNS_BASE);
2003}
2004
2005static void lazyInitialize (langType language)
2006{
2007 parserDefinition* def;
2008
2009 Assert (0 <= language && language < (int) LanguageCount);
2010 def = LanguageTable [language].def;
2011
2012 def->parser = doNothing;
2013
2014 if (def->method & METHOD_REGEX)
2015 {
2018 else
2019 def->parser = findRegexTags;
2020 }
2021}
2022
2023extern void enableDefaultFileKind (bool state)
2024{
2025 defaultFileKind.enabled = state;
2026}
2027
2028/*
2029* Option parsing
2030*/
2032{
2033 char *base;
2036};
2037
2038static void pre_lang_def_flag_base_long (const char* const optflag, const char* const param, void* data)
2039{
2040 struct preLangDefFlagData * flag_data = data;
2041 langType base;
2042
2043 if (param[0] == '\0')
2044 {
2045 error (WARNING, "No base parser specified for \"%s\" flag of --langdef option", optflag);
2046 return;
2047 }
2048
2049 base = getNamedLanguage (param, 0);
2050 if (base == LANG_IGNORE)
2051 {
2052 error (WARNING, "Unknown language(%s) is specified for \"%s\" flag of --langdef option",
2053 param, optflag);
2054 return;
2055
2056 }
2057
2058 flag_data->base = eStrdup(param);
2059}
2060
2061#define LANGDEF_FLAG_DEDICATED "dedicated"
2062#define LANGDEF_FLAG_SHARED "shared"
2063#define LANGDEF_FLAG_BIDIR "bidirectional"
2064static void pre_lang_def_flag_direction_long (const char* const optflag, const char* const param CTAGS_ATTR_UNUSED, void* data)
2065{
2066 struct preLangDefFlagData * flag_data = data;
2067
2068 if (strcmp(optflag, LANGDEF_FLAG_DEDICATED) == 0)
2070 else if (strcmp(optflag, LANGDEF_FLAG_SHARED) == 0)
2072 else if (strcmp(optflag, LANGDEF_FLAG_BIDIR) == 0)
2073 flag_data->direction = SUBPARSER_BI_DIRECTION;
2074 else
2076}
2077
2078static void pre_lang_def_flag_autoFQTag_long (const char* const optflag,
2079 const char* const param CTAGS_ATTR_UNUSED,
2080 void* data)
2081{
2082 struct preLangDefFlagData * flag_data = data;
2083 flag_data->autoFQTag = true;
2084}
2085
2087 { '\0', "base", NULL, pre_lang_def_flag_base_long,
2088 "BASEPARSER", "utilize as a base parser"},
2091 NULL, "make the base parser dedicated to this subparser"},
2092 { '\0', LANGDEF_FLAG_SHARED, NULL,
2094 NULL, "share the base parser with the other subparsers"
2095 },
2096 { '\0', LANGDEF_FLAG_BIDIR, NULL,
2098 NULL, "utilize the base parser both 'dedicated' and 'shared' way"
2099 },
2100 { '\0', "_autoFQTag", NULL, pre_lang_def_flag_autoFQTag_long,
2101 NULL, "make full qualified tags automatically based on scope information"},
2102};
2103
2104static void optlibFreeDep (langType lang, bool initialized CTAGS_ATTR_UNUSED)
2105{
2106 parserDefinition * pdef = LanguageTable [lang].def;
2107
2108 if (pdef->dependencyCount == 1)
2109 {
2110 parserDependency *dep = pdef->dependencies;
2111
2112 eFree ((char *)dep->upperParser); /* Dirty cast */
2113 dep->upperParser = NULL;
2114 eFree (dep->data);
2115 dep->data = NULL;
2116 eFree (dep);
2117 pdef->dependencies = NULL;
2118 }
2119}
2120
2121static parserDefinition* OptlibParser(const char *name, const char *base,
2123{
2124 parserDefinition *def;
2125
2126 def = parserNew (name);
2129 if (base)
2130 {
2131 subparser *sub = xCalloc (1, subparser);
2133
2134 sub->direction = direction;
2135 dep->type = DEPTYPE_SUBPARSER;
2136 dep->upperParser = eStrdup (base);
2137 dep->data = sub;
2138 def->dependencies = dep;
2139 def->dependencyCount = 1;
2140 def->finalize = optlibFreeDep;
2141 }
2142
2143 return def;
2144}
2145
2147 const char *const option, const char *const parameter)
2148{
2149 char *name;
2150 char *flags;
2151 parserDefinition* def;
2152
2153 flags = strchr (parameter, LONG_FLAGS_OPEN);
2154 if (flags)
2155 name = eStrndup (parameter, flags - parameter);
2156 else
2157 name = eStrdup (parameter);
2158
2159 /* Veirfy that the name of new language is acceptable or not. */
2160 char *unacceptable;
2161 if (name [0] == '\0')
2162 {
2163 eFree (name);
2164 error (FATAL, "No language specified for \"%s\" option", option);
2165 }
2166 else if (getNamedLanguage (name, 0) != LANG_IGNORE)
2167 {
2168 /* name cannot be freed because it is used in the FATAL message. */
2169 error (FATAL, "Language \"%s\" already defined", name);
2170 }
2171 else if (strcmp(name, RSV_LANG_ALL) == 0)
2172 {
2173 eFree (name);
2174 error (FATAL, "\"all\" is reserved; don't use it as the name for defining a new language");
2175 }
2176 else if ((unacceptable = strpbrk (name, "!\"$%&'()*,-./:;<=>?@[\\]^`|~")))
2177 {
2178 char c = *unacceptable;
2179
2180 /* name cannot be freed because it is used in the FATAL message. */
2181 /* We accept '_'.
2182 * We accept # and + because they are already used in C# parser and C++ parser.
2183 * {... is already trimmed at the beginning of this function. */
2184 if ((c == '`') || (c == '\''))
2185 error (FATAL, "don't use \"%c\" in a language name (%s)", c, name);
2186 else
2187 error (FATAL, "don't use `%c' in a language name (%s)", c, name);
2188 }
2189
2191 memset (LanguageTable + LanguageCount, 0, sizeof(parserObject));
2192
2193 struct preLangDefFlagData data = {
2194 .base = NULL,
2195 .direction = SUBPARSER_UNKNOWN_DIRECTION,
2196 .autoFQTag = false,
2197 };
2199
2200 if (data.base == NULL && data.direction != SUBPARSER_UNKNOWN_DIRECTION)
2201 error (WARNING, "Ignore the direction of subparser because \"{base=}\" is not given");
2202
2203 if (data.base && data.direction == SUBPARSER_UNKNOWN_DIRECTION)
2205
2206 def = OptlibParser (name, data.base, data.direction);
2207 if (data.base)
2208 eFree (data.base);
2209
2210 def->requestAutomaticFQTag = data.autoFQTag;
2211
2212 initializeParsingCommon (def, false);
2214
2219
2220 eFree (name);
2221}
2222
2223extern bool isLanguageKindEnabled (const langType language, int kindIndex)
2224{
2225 kindDefinition * kdef = getLanguageKind (language, kindIndex);
2226 return kdef->enabled;
2227}
2228
2229extern bool isLanguageRoleEnabled (const langType language, int kindIndex, int roleIndex)
2230{
2232 kindIndex, roleIndex);
2233}
2234
2235extern bool isLanguageKindRefOnly (const langType language, int kindIndex)
2236{
2237 kindDefinition * def = getLanguageKind(language, kindIndex);
2238 return def->referenceOnly;
2239}
2240
2241static void resetLanguageKinds (const langType language, const bool mode)
2242{
2243 const parserObject* parser;
2244
2245 Assert (0 <= language && language < (int) LanguageCount);
2246 parser = LanguageTable + language;
2247
2248 {
2249 unsigned int i;
2250 struct kindControlBlock *kcb = parser->kindControlBlock;
2251
2252 for (i = 0 ; i < countKinds (kcb) ; ++i)
2253 {
2254 kindDefinition *kdef = getKind (kcb, i);
2255 enableKind (kdef, mode);
2256 }
2257 }
2258}
2259
2261 const langType language, const int kind, const bool mode)
2262{
2263 bool result = false;
2264 kindDefinition* const def = getLanguageKindForLetter (language, kind);
2265 if (def != NULL)
2266 {
2267 enableKind (def, mode);
2268 result = true;
2269 }
2270 return result;
2271}
2272
2274 const langType language, const char * const name, const bool mode)
2275{
2276 bool result = false;
2277 kindDefinition* const def = getLanguageKindForName (language, name);
2278 if (def != NULL)
2279 {
2280 enableKind (def, mode);
2281 result = true;
2282 }
2283 return result;
2284}
2285
2287 const langType language, const char *const option,
2288 const char *const parameter)
2289{
2290 const char *p = parameter;
2291 bool mode = true;
2292 int c;
2293 static vString *longName;
2294 bool inLongName = false;
2295 const char *k;
2296 bool r;
2297
2298 Assert (0 <= language && language < (int) LanguageCount);
2299
2300 initializeParser (language);
2301 if (*p == '*')
2302 {
2303 resetLanguageKinds (language, true);
2304 p++;
2305 }
2306 else if (*p != '+' && *p != '-')
2307 resetLanguageKinds (language, false);
2308
2309 longName = vStringNewOrClearWithAutoRelease (longName);
2310
2311 while ((c = *p++) != '\0')
2312 {
2313 switch (c)
2314 {
2315 case '+':
2316 if (inLongName)
2317 vStringPut (longName, c);
2318 else
2319 mode = true;
2320 break;
2321 case '-':
2322 if (inLongName)
2323 vStringPut (longName, c);
2324 else
2325 mode = false;
2326 break;
2327 case '{':
2328 if (inLongName)
2329 error(FATAL,
2330 "unexpected character in kind specification: \'%c\'",
2331 c);
2332 inLongName = true;
2333 break;
2334 case '}':
2335 if (!inLongName)
2336 error(FATAL,
2337 "unexpected character in kind specification: \'%c\'",
2338 c);
2339 k = vStringValue (longName);
2340 r = enableLanguageKindForName (language, k, mode);
2341 if (! r)
2342 error (WARNING, "Unsupported kind: '%s' for --%s option",
2343 k, option);
2344
2345 inLongName = false;
2346 vStringClear (longName);
2347 break;
2348 default:
2349 if (inLongName)
2350 vStringPut (longName, c);
2351 else
2352 {
2353 r = enableLanguageKindForLetter (language, c, mode);
2354 if (! r)
2355 error (WARNING, "Unsupported kind: '%c' for --%s option",
2356 c, option);
2357 }
2358 break;
2359 }
2360 }
2361}
2362
2363static void freeKdef (kindDefinition *kdef)
2364{
2365 eFree (kdef->name);
2366 eFree (kdef->description);
2367 eFree (kdef);
2368}
2369
2370static char *extractDescriptionAndFlags(const char *input, const char **flags)
2371{
2372 vString *vdesc = vStringNew();
2373 bool escaped = false;
2374
2375 if (flags)
2376 *flags = NULL;
2377
2378 while (*input != '\0')
2379 {
2380 if (escaped)
2381 {
2382 vStringPut (vdesc, *input);
2383 escaped = false;
2384
2385 }
2386 else if (*input == '\\')
2387 escaped = true;
2388 else if (*input == LONG_FLAGS_OPEN)
2389 {
2390 if (flags)
2391 *flags = input;
2392 break;
2393 }
2394 else
2395 vStringPut (vdesc, *input);
2396 input++;
2397 }
2398 return vStringDeleteUnwrap(vdesc);
2399}
2400
2401static void pre_kind_def_flag_refonly_long (const char* const optflag,
2402 const char* const param, void* data)
2403{
2404 kindDefinition *kdef = data;
2405 kdef->referenceOnly = true;
2406}
2407
2409 { '\0', "_refonly", NULL, pre_kind_def_flag_refonly_long,
2410 NULL, "use this kind reference tags only"},
2411};
2412
2413static bool processLangDefineKind(const langType language,
2414 const char *const option,
2415 const char *const parameter)
2416{
2417 parserObject *parser;
2418
2419 kindDefinition *kdef;
2420 char letter;
2421 const char * p = parameter;
2422 char *name;
2423 char *description;
2424 const char *name_start;
2425 const char *marker_end;
2426 size_t name_len;
2427 const char *flags;
2428
2429
2430 Assert (0 <= language && language < (int) LanguageCount);
2431 parser = LanguageTable + language;
2432
2433 Assert (p);
2434
2435 if (p[0] == '\0')
2436 error (FATAL, "no kind definition specified in \"--%s\" option", option);
2437
2438 letter = p[0];
2439 if (letter == ',')
2440 error (FATAL, "no kind letter specified in \"--%s\" option", option);
2441 if (/* See #1697. isalnum expects 0~255 as the range of characters. */
2442 !isalpha ((unsigned char)letter)
2443 )
2444 error (FATAL, "the kind letter given in \"--%s\" option is not an alphabet", option);
2445 else if (letter == KIND_FILE_DEFAULT_LETTER)
2446 error (FATAL, "the kind letter `%c' in \"--%s\" option is reserved for \"%s\" kind",
2448 else if (getKindForLetter (parser->kindControlBlock, letter))
2449 {
2450 error (WARNING, "the kind for letter `%c' specified in \"--%s\" option is already defined.",
2451 letter, option);
2452 return true;
2453 }
2454
2455 if (p[1] != ',')
2456 error (FATAL, "wrong kind definition in \"--%s\" option: no comma after letter", option);
2457
2458 p += 2;
2459 if (p[0] == '\0')
2460 error (FATAL, "no kind name specified in \"--%s\" option", option);
2461 marker_end = strchr (p, ',');
2462 if (!marker_end)
2463 error (FATAL, "no kind description specified in \"--%s\" option", option);
2464
2465 name_start = p;
2466 while (p != marker_end)
2467 {
2468 if (p == name_start)
2469 {
2470 if (!isalpha(*p))
2471 {
2472 char *name_in_msg = eStrndup (name_start, marker_end - name_start);
2473 error (FATAL,
2474 "a kind name doesn't start with an alphabetical character: "
2475 "'%s' in \"--%s\" option",
2476 name_in_msg, option);
2477 }
2478 }
2479 else
2480 {
2481 if (!isalnum (*p))
2482 {
2483 char *name_in_msg = eStrndup (name_start, marker_end - name_start);
2484 error (FATAL,
2485 "non-alphanumeric char is used as part of kind name: "
2486 "'%s' in \"--%s\" option",
2487 name_in_msg, option);
2488 }
2489 }
2490 p++;
2491 }
2492
2493 if (marker_end == name_start)
2494 error (FATAL, "the kind name in \"--%s\" option is empty", option);
2495
2496 name_len = marker_end - name_start;
2497 if (strncmp (name_start, KIND_FILE_DEFAULT_NAME, name_len) == 0)
2498 error (FATAL,
2499 "the kind name " KIND_FILE_DEFAULT_NAME " in \"--%s\" option is reserved",
2500 option);
2501
2502 name = eStrndup (name_start, name_len);
2503 if (getKindForName (parser->kindControlBlock, name))
2504 {
2505 error (WARNING, "the kind for name `%s' specified in \"--%s\" option is already defined.",
2506 name, option);
2507 eFree (name);
2508 return true;
2509 }
2510
2511 p++;
2512 if (p [0] == '\0' || p [0] == LONG_FLAGS_OPEN)
2513 error (FATAL, "found an empty kind description in \"--%s\" option", option);
2514
2515 description = extractDescriptionAndFlags (p, &flags);
2516
2517 kdef = xCalloc (1, kindDefinition);
2518 kdef->enabled = true;
2519 kdef->letter = letter;
2520 kdef->name = name;
2521 kdef->description = description;
2522 if (flags)
2524
2525 defineKind (parser->kindControlBlock, kdef, freeKdef);
2526 return true;
2527}
2528
2529static void freeRdef (roleDefinition *rdef)
2530{
2531 eFree (rdef->name);
2532 eFree (rdef->description);
2533 eFree (rdef);
2534}
2535
2536static bool processLangDefineRole(const langType language,
2537 const char *const kindSpec,
2538 const char *const option,
2539 const char *const parameter)
2540{
2541 parserObject *parser;
2542
2543 kindDefinition *kdef;
2544 roleDefinition *rdef;
2545 char *name;
2546 char *description;
2547
2548 Assert (0 <= language && language < (int) LanguageCount);
2549 Assert (parameter);
2550
2551 parser = LanguageTable + language;
2552
2553 if (*kindSpec == '{')
2554 {
2555 const char *end = strchr (kindSpec, '}');
2556 if (end == NULL)
2557 error (FATAL, "no '}' representing the end of kind name in --%s option: %s",
2558 option, kindSpec);
2559 if (*(end + 1) != '\0')
2560 error (FATAL, "garbage after the kind specification %s in --%s option",
2561 kindSpec, option);
2562 char *kindName = eStrndup (kindSpec + 1, end - (kindSpec + 1));
2563 if (strcmp (kindName, KIND_FILE_DEFAULT_NAME) == 0)
2564 error (FATAL, "don't define a role for %c/%s kind; it has no role: --%s",
2566 option);
2567 kdef = getKindForName (parser->kindControlBlock, kindName);
2568 if (kdef == NULL)
2569 error (FATAL, "the kind for name `%s' specified in \"--%s\" option is not defined.",
2570 kindName, option);
2571 eFree (kindName);
2572 }
2573 else
2574 {
2575 char kletter = *kindSpec;
2576 if (!isalnum ((unsigned char)kletter))
2577 error (FATAL, "the kind letter given in \"--%s\" option is not an alphabet or a number", option);
2578 else if (kletter == KIND_FILE_DEFAULT_LETTER)
2579 error (FATAL, "the kind letter `%c' in \"--%s\" option is reserved for \"%s\" kind, and no role can be attached to it",
2581 else if (*(kindSpec + 1) != '\0')
2582 error (FATAL, "more than one letters are specified as a kind spec in \"--%s\" option: use `{' and `}' for specifying a kind name",
2583 option);
2584
2585 kdef = getKindForLetter (parser->kindControlBlock, kletter);
2586 if (kdef == NULL)
2587 {
2588 error (FATAL, "the kind for letter `%c' specified in \"--%s\" option is not defined.",
2589 *kindSpec, option);
2590 return true;
2591 }
2592 }
2593
2594 const char * p = parameter;
2595 const char *tmp_end = strchr (p, ',');
2596 if (!tmp_end)
2597 error (FATAL, "no role description specified in \"--%s\" option", option);
2598
2599 const char * tmp_start = p;
2600 while (p != tmp_end)
2601 {
2602 if (!isalnum (*p))
2603 error (FATAL, "unacceptable char as part of role name in \"--%s\" option: %c",
2604 option, *p);
2605 p++;
2606 }
2607
2608 if (tmp_end == tmp_start)
2609 error (FATAL, "the role name in \"--%s\" option is empty", option);
2610
2611 name = eStrndup (tmp_start, tmp_end - tmp_start);
2612 if (getRoleForName (parser->kindControlBlock, kdef->id, name))
2613 {
2614 error (WARNING, "the role for name `%s' specified in \"--%s\" option is already defined.",
2615 name, option);
2616 eFree (name);
2617 return true;
2618 }
2619
2620 p++;
2621 if (p [0] == '\0' || p [0] == LONG_FLAGS_OPEN)
2622 error (FATAL, "found an empty role description in \"--%s\" option", option);
2623
2624 const char *flags;
2625 description = extractDescriptionAndFlags (p, &flags);
2626
2627 rdef = xCalloc (1, roleDefinition);
2628 rdef->enabled = true;
2629 rdef->name = name;
2630 rdef->description = description;
2631
2632 if (flags)
2633 flagsEval (flags, NULL, 0, rdef);
2634
2635 defineRole (parser->kindControlBlock, kdef->id, rdef, freeRdef);
2636
2637 return true;
2638}
2639
2640extern bool processKinddefOption (const char *const option, const char * const parameter)
2641{
2642 langType language;
2643
2644 language = getLanguageComponentInOption (option, "kinddef-");
2645 if (language == LANG_IGNORE)
2646 return false;
2647
2648 return processLangDefineKind (language, option, parameter);
2649}
2650
2651extern bool processRoledefOption (const char *const option, const char * const parameter)
2652{
2653#define PREFIX "_roledef-"
2654#define PREFIX_LEN strlen(PREFIX)
2655
2656 langType language = getLanguageComponentInOption (option, PREFIX);
2657 if (language == LANG_IGNORE)
2658 return false;
2659
2660 Assert (0 <= language && language < (int) LanguageCount);
2661 const char* kindSpec = option + PREFIX_LEN + strlen (getLanguageName (language));
2662 if (*kindSpec == '\0')
2663 error (FATAL, "no kind is specifined in \"--%s=%s\"", option, parameter);
2664 if (*kindSpec != '.')
2665 error (FATAL, "no delimiter (.) where a kindspec starts is found in \"--%s\": %c",
2666 option, *kindSpec);
2667 kindSpec++;
2668
2669 return processLangDefineRole (language, kindSpec, option, parameter);
2670#undef PREFIX
2671#undef PREFIX_LEN
2672}
2673
2675 const char *const option;
2676 const char *const parameter;
2677};
2679 langType lang, void* user_data)
2680{
2681 struct langKindDefinitionStruct *arg = user_data;
2682 processLangKindDefinition (lang, arg->option, arg->parameter);
2683}
2684
2685static bool parameterEnablingAllOrFileKind (const char *const option,
2686 const char *const parameter,
2687 bool following_plus_or_minus_op)
2688{
2689 size_t file_long_flag_len = strlen(KIND_FILE_DEFAULT_NAME);
2690
2691 switch (parameter[0])
2692 {
2693 /* Though only '*' is documented as an acceptable kind spec for
2694 * --kinds-all option in our man page, we accept '\0' here because
2695 * it will be useful for testing purpose. */
2696 case '\0':
2697 if (following_plus_or_minus_op)
2698 error(FATAL, "no kind specification after + (or -) in --%s option",
2699 option);
2700 else
2701 return true;
2702 case '+':
2703 case '-':
2704 if (following_plus_or_minus_op)
2705 error(FATAL, "don't repeat + (nor -) in --%s option",
2706 option);
2707 else
2710 if (following_plus_or_minus_op)
2711 error(FATAL, "don't use '*' after + (nor -) in --%s option",
2712 option);
2713 else
2714 return parameterEnablingAllOrFileKind (option, parameter + 1, false);
2716 return parameterEnablingAllOrFileKind (option, parameter + 1, false);
2717 case '{':
2718 if (strncmp (parameter + 1, KIND_FILE_DEFAULT_NAME, file_long_flag_len) == 0
2719 && parameter [1 + file_long_flag_len] == '}')
2721 parameter + 1 + file_long_flag_len + 1,
2722 false);
2723 break;
2724 }
2725 return false;
2726}
2727
2729 const char *const option, const char *const parameter)
2730{
2731#define PREFIX "kinds-"
2732#define PREFIX_LEN strlen(PREFIX)
2733
2734 bool handled = false;
2735 struct langKindDefinitionStruct arg = {
2736 .option = option,
2737 .parameter = parameter,
2738 };
2739 langType language;
2740
2741 const char* const dash = strchr (option, '-');
2742 if (dash != NULL &&
2743 (strcmp (dash + 1, "kinds") == 0 || strcmp (dash + 1, "types") == 0))
2744 {
2745 size_t len = dash - option;
2746 char *langName = eStrndup (option, len);
2747
2748 if ((len == 3) && (strcmp (langName, RSV_LANG_ALL) == 0))
2749 {
2750 error (WARNING,
2751 "\"--%s\" option is obsolete; use \"--kinds-%s\" instead",
2752 option, langName);
2754 error (FATAL, "only '*', 'F', \"{file}\" or their combination is acceptable as kind letter for --%s", option);
2756 }
2757 else
2758 {
2759 language = getNamedLanguage (langName, 0);
2760 if (language == LANG_IGNORE)
2761 error (WARNING, "Unknown language \"%s\" in \"%s\" option", langName, option);
2762 else
2764 }
2765 eFree (langName);
2766 handled = true;
2767 }
2768 else if ( strncmp (option, PREFIX, PREFIX_LEN) == 0 )
2769 {
2770 const char* lang;
2771
2772 lang = option + PREFIX_LEN;
2773 if (lang[0] == '\0')
2774 error (WARNING, "No language given in \"%s\" option", option);
2775 else if (strcmp (lang, RSV_LANG_ALL) == 0)
2776 {
2778 error (FATAL, "only '*', 'F', \"{file}\" or their combination is acceptable as kind letter for --%s", option);
2780 }
2781 else
2782 {
2783 language = getNamedLanguage (lang, 0);
2784 if (language == LANG_IGNORE)
2785 error (WARNING, "Unknown language \"%s\" in \"%s\" option", lang, option);
2786 else
2788 }
2789 handled = true;
2790 }
2791 return handled;
2792#undef PREFIX
2793#undef PREFIX_LEN
2794}
2795
2796/*
2797 * The argument specification for --roles-<LANG>:<KIND>= option
2798 * =====================================================================
2799 *
2800 * --roles-all.*=
2801 * --roles-all=
2802 * => Disable all roles of all kinds in all languages.
2803 *
2804 * --roles-all.*='*'
2805 * --roles-all='*'
2806 * => Enable all roles of all kinds in all languages.
2807 *
2808 * --roles-<LANG>.*=
2809 * --roles-<LANG>=
2810 * => Disable all roles of all kinds.
2811 *
2812 * --roles-<LANG>.*=*
2813 * --roles-<LANG>=*
2814 * => Enable all roles of all kinds.
2815 *
2816 * --roles-<LANG>.{kind}=
2817 * --roles-<LANG>.k=
2818 * => Disable all roles of the kind specified with a letter.
2819 *
2820 * --roles-<LANG>.{kind}=*
2821 * --roles-<LANG>.k=*
2822 * => Enable all roles of the kind specified with a letter.
2823 *
2824 * --roles-<LANG>.{kind}=[+|-]{role}
2825 * --roles-<LANG>.k=[+|-]{role}
2826 * => Enable/disable the role of the kind specified with a letter.
2827 *
2828 *
2829 * Examples of combination
2830 * ---------------------------------------------------------------------
2831 *
2832 * --roles-<LANG>.k0=+{role0}-{role1}{role2}
2833 * --roles-<LANG>.{kind1}=+{role0}-{role1}{role2}
2834 *
2835 *
2836 * How --roledef should be change to align --roles-<LANG> notation
2837 * ---------------------------------------------------------------------
2838 *
2839 * --_roledef-<LANG>.k=role,description
2840 * --_roledef-<LANG>.{kind}=role,description
2841 *
2842 * The notation was
2843 * --_roledef-<LANG>=k.role,description
2844 *
2845 *
2846 * How --param should be change to align --roles-<LANG> notation
2847 * ---------------------------------------------------------------------
2848 *
2849 * --_param-<LANG>.name=argument
2850 *
2851 * * The notation was
2852 * --_param-<LANG>:name=argument
2853 *
2854 *
2855 * How --paramdef should be to align --roles-<LANG> notation
2856 * ---------------------------------------------------------------------
2857 *
2858 * --_paramdef-<LANG>.name=[ default (desription) ]
2859 *
2860 *
2861 * Discussion: which shoule we use . or : ?
2862 * ---------------------------------------------------------------------
2863 *
2864 * `.' is better because `:' implies fields.
2865 *
2866 */
2869 const char *const option;
2870 const char *const parameter;
2871};
2872
2873typedef void (*kindCallback) (langType language, int kindIndex, void* user_data);
2874static void foreachKind(langType language, kindCallback callback, void *user_data)
2875{
2876 unsigned int c = countLanguageKinds (language);
2877 for (unsigned int i = 0; i < c; i++)
2878 callback (language, (int)i, user_data);
2879}
2880
2881static void resetKindRoles (const langType language, int kindIndex, const bool mode)
2882{
2883 Assert (0 <= language && language < (int) LanguageCount);
2884 unsigned int c = countLanguageRoles (language, kindIndex);
2885
2886 for (unsigned int i = 0; i < c; i++)
2887 {
2888 roleDefinition* rdef = getLanguageRole (language, kindIndex, (int)i);
2889 enableRole (rdef, mode);
2890 }
2891}
2892
2893static void resetKindRolesAsCallback (const langType language, int kindIndex, void *user_data)
2894{
2895 bool mode = (bool)user_data;
2896 resetKindRoles (language, kindIndex, mode);
2897}
2898
2900 const langType language, const int kindIndex, const char *const option,
2901 const char *const parameter)
2902{
2903 Assert (0 <= language && language < (int) LanguageCount);
2904 Assert (kindIndex != KIND_GHOST_INDEX);
2905 initializeParser (language);
2906
2907 const char *p = parameter;
2908 bool mode = true;
2909
2910 if (*p == '\0')
2911 {
2912 resetKindRoles (language, kindIndex, false);
2913 return;
2914 }
2915 else if (*p != '+' && *p != '-' )
2916 resetKindRoles (language, kindIndex, false);
2917
2918 while (*p != '\0')
2919 {
2920 if (*p == '+')
2921 {
2922 mode = true;
2923 p++;
2924 }
2925 else if (*p == '-')
2926 {
2927 mode = false;
2928 p++;
2929 }
2930 else if (*p == '{')
2931 {
2932 p++;
2933 char *q = strchr (p, '}');
2934 if (!q)
2935
2936 error (FATAL, "no '}' representing the end of role name in --%s option: %s",
2937 option, p);
2938 if (p == q)
2939 error (FATAL, "empty role for the kind letter: %c",
2940 getLanguageKind (language, kindIndex)->letter);
2941
2942 char *rname = eStrndup (p, q - p);
2943 roleDefinition *rdef = getLanguageRoleForName (language, kindIndex, rname);
2944 if (!rdef)
2945 error (WARNING, "no such role: %s of %c kind in language %s",
2946 rname, getLanguageKind (language, kindIndex)->letter,
2947 getLanguageName (language));
2948 else
2949 enableRole (rdef, mode);
2950 eFree (rname);
2951 p = q + 1;
2952 }
2953 else if (*p == '*')
2954 {
2955 resetKindRoles (language, kindIndex, true);
2956 p++;
2957 }
2958 else
2959 error (FATAL, "unexpected character %c in --%s=%s option",
2960 *p, option, parameter);
2961 }
2962}
2963
2964static void processLangKindRoleDefinitionEach (langType language, void* user_data)
2965{
2966 struct langKindRoleDefinitionStruct *arg = user_data;
2967
2968 if (arg->kindIndex == KIND_GHOST_INDEX)
2969 {
2970 initializeParser (language);
2972 ((*(arg->parameter) == '*')? (void *)true: (void *)false));
2973 }
2974 else
2976 arg->option, arg->parameter);
2977}
2978
2979extern bool processRolesOption (const char *const option, const char *const parameter)
2980{
2981#define PREFIX "roles-"
2982#define PREFIX_LEN strlen(PREFIX)
2983
2984 if ( strncmp (option, PREFIX, PREFIX_LEN) != 0 )
2985 return false;
2986
2987 const char* lang = option + PREFIX_LEN;
2988 if (lang[0] == '\0')
2989 {
2990 error (WARNING, "no language given in \"%s\" option", option);
2991 return true;
2992 }
2993
2994 /*
2995 * --roles-all.*=
2996 * --roles-all=
2997 * => Disable all roles of all kinds in all languages.
2998 *
2999 * --roles-all.*='*'
3000 * --roles-all='*'
3001 * => Enable all roles of all kinds in all languages.
3002 */
3003 if (strncmp (lang, RSV_LANG_ALL, strlen(RSV_LANG_ALL)) == 0)
3004 {
3005 if (lang [strlen (RSV_LANG_ALL)] == '\0'
3006 || (strcmp (lang + strlen (RSV_LANG_ALL), ".*") == 0))
3007 {
3008 if (*parameter == '\0'
3009 || strcmp(parameter, "*") == 0)
3010 {
3011 struct langKindRoleDefinitionStruct arg = {
3013 .option = option,
3014 .parameter = parameter,
3015 };
3017 return true;
3018 }
3019 else
3020 error (FATAL, "only '*' or '' (empty string) is acceptable as an argument for --%s: %s",
3021 option,
3022 parameter);
3023 }
3024 else if (lang [strlen(RSV_LANG_ALL)] == '.')
3025 error (FATAL, "only '*' or '' (empty string) is acceptable as a kind spec for --%sall: --%s",
3026 PREFIX,
3027 option);
3028 }
3029
3030 /* Decide the language. */
3031 langType language;
3032 const char *dot = strchr (lang, '.');
3033 if (dot)
3034 language = getNamedLanguage (lang, dot - lang);
3035 else
3036 language = getNamedLanguage (lang, 0);
3037
3038 if (language == LANG_IGNORE)
3039 {
3040 char *lang0 = dot? eStrndup (lang, dot - lang): NULL;
3041 error (WARNING, "unknown language \"%s\" in --%s option",
3042 (lang0? lang0: lang), option);
3043 if (lang0)
3044 eFree (lang0);
3045 return true;
3046 }
3047
3048 /*
3049 * --roles-<LANG>.*=
3050 * --roles-<LANG>=
3051 * => Disable all roles of all kinds.
3052 *
3053 * --roles-<LANG>.*=*
3054 * --roles-<LANG>=*
3055 * => Enable all roles of all kinds.
3056 */
3057 if (dot == NULL || (strcmp (dot, ".*") == 0))
3058 {
3059 if (*parameter == '\0'
3060 || strcmp(parameter, "*") == 0)
3061 {
3063 ((*parameter == '*')? (void*)true: (void*)false));
3064 return true;
3065 }
3066 else
3067 error (FATAL, "only '*' or '' (empty string) is acceptable as an argument for --%s: %s",
3068 option,
3069 parameter);
3070 }
3071
3072 /* Decide the kind of the language. */
3073 parserObject *parser = LanguageTable + language;
3075 const char *kind = dot + 1;
3076 if (*kind == '{')
3077 {
3078 const char *name_end = strchr (kind, '}');
3079 if (name_end == NULL)
3080 error (FATAL, "no '}' representing the end of kind name in --%s option: %s",
3081 option, kind);
3082 char *kindName = eStrndup (kind + 1, name_end - (kind + 1));
3083 if (strcmp (kindName, KIND_FILE_DEFAULT_NAME) == 0)
3084 {
3085 error (WARNING, "don't enable/disable a role in %c/%s kind; it has no role: --%s",
3087 option);
3088 return true;
3089 }
3090 kindIndex = getKindIndexForName (parser->kindControlBlock, kindName);
3092 {
3093 eFree (kindName);
3094 error (WARNING, "no such kind name as specified in --%s option", option);
3095 return true;
3096 }
3097 if (*(name_end + 1) != '\0')
3098 error (FATAL, "garbage after the kind specification {%s} in --%s option",
3099 kindName, option);
3100 eFree (kindName);
3101 }
3102 else if (isalpha ((unsigned char)*kind))
3103 {
3104 if (*kind == KIND_FILE_DEFAULT_LETTER)
3105 {
3106 error (WARNING, "don't enable/disable a role in %c/%s kind; it has no role: --%s",
3108 option);
3109 return true;
3110 }
3113 {
3114 error (WARNING, "no such kind letter as specified in --%s option", option);
3115 return true;
3116 }
3117 if (*(kind + 1) != '\0')
3118 error (FATAL, "garbage after the kind specification '%c' in --%s option",
3119 *kind, option);
3120 }
3121 else
3122 error (FATAL, "'%c', unexpected character in --%s", *kind, option);
3123
3124
3125 /*
3126 * --roles-<LANG>.{kind}=
3127 * --roles-<LANG>.k=
3128 * => Disable all roles of the kind specified with a letter.
3129 *
3130 * --roles-<LANG>.{kind}=*
3131 * --roles-<LANG>.k=*
3132 * => Enable all roles of the kind specified with a letter.
3133 *
3134 * --roles-<LANG>.{kind}=[+|-|]{role}
3135 * --roles-<LANG>.k=[+|-|]{role}
3136 * => Enable/disable the role of the kind specified with a letter.
3137 */
3139
3140 return true;
3141#undef PREFIX
3142#undef PREFIX_LEN
3143}
3144
3145extern void printLanguageRoles (const langType language, const char* kindspecs,
3146 bool withListHeader, bool machinable, FILE *fp)
3147{
3148 struct colprintTable *table = roleColprintTableNew();
3149 parserObject *parser;
3150
3151 initializeParser (language);
3152
3153 if (language == LANG_AUTO)
3154 {
3155 for (unsigned int i = 0 ; i < LanguageCount ; ++i)
3156 {
3157 if (!isLanguageVisible (i))
3158 continue;
3159
3160 parser = LanguageTable + i;
3161 roleColprintAddRoles (table, parser->kindControlBlock, kindspecs);
3162 }
3163 }
3164 else
3165 {
3166 parser = LanguageTable + language;
3167 roleColprintAddRoles (table, parser->kindControlBlock, kindspecs);
3168 }
3169
3170 roleColprintTablePrint (table, (language != LANG_AUTO),
3171 withListHeader, machinable, fp);
3172 colprintTableDelete (table);
3173}
3174
3175static void printKinds (langType language, bool indent,
3176 struct colprintTable * table)
3177{
3178 const parserObject *parser;
3179 struct kindControlBlock *kcb;
3180 Assert (0 <= language && language < (int) LanguageCount);
3181
3182 initializeParser (language);
3183 parser = LanguageTable + language;
3184 kcb = parser->kindControlBlock;
3185
3186 if (table)
3187 kindColprintAddLanguageLines (table, kcb);
3188 else
3189 {
3190 for (unsigned int i = 0 ; i < countKinds(kcb) ; ++i)
3191 printKind (getKind(kcb, i), indent);
3192 }
3193}
3194
3195extern void printLanguageKinds (const langType language, bool allKindFields,
3196 bool withListHeader, bool machinable, FILE *fp)
3197{
3198 struct colprintTable * table = NULL;
3199
3200 if (allKindFields)
3201 table = kindColprintTableNew ();
3202
3203 if (language == LANG_AUTO)
3204 {
3205 for (unsigned int i = 0 ; i < LanguageCount ; ++i)
3206 {
3207 const parserDefinition* const lang = LanguageTable [i].def;
3208
3209 if (lang->invisible)
3210 continue;
3211
3212 if (!table)
3213 printf ("%s%s\n", lang->name, isLanguageEnabled (i) ? "" : " [disabled]");
3214 printKinds (i, true, table);
3215 }
3216 }
3217 else
3218 printKinds (language, false, table);
3219
3220 if (allKindFields)
3221 {
3222 kindColprintTablePrint(table, (language == LANG_AUTO)? 0: 1,
3223 withListHeader, machinable, fp);
3224 colprintTableDelete (table);
3225 }
3226}
3227
3228static void printParameters (struct colprintTable *table, langType language)
3229{
3230 const parserDefinition* lang;
3231 Assert (0 <= language && language < (int) LanguageCount);
3232
3233 initializeParser (language);
3234 lang = LanguageTable [language].def;
3235 if (lang->parameterHandlerTable != NULL)
3236 {
3237 for (unsigned int i = 0; i < lang->parameterHandlerCount; ++i)
3238 paramColprintAddParameter(table, language, lang->parameterHandlerTable + i);
3239 }
3240
3241}
3242
3243extern void printLanguageParameters (const langType language,
3244 bool withListHeader, bool machinable, FILE *fp)
3245{
3246 struct colprintTable *table = paramColprintTableNew();
3247
3248 if (language == LANG_AUTO)
3249 {
3250 for (unsigned int i = 0; i < LanguageCount ; ++i)
3251 {
3252 const parserDefinition* const lang = LanguageTable [i].def;
3253
3254 if (lang->invisible)
3255 continue;
3256
3257 printParameters (table, i);
3258 }
3259 }
3260 else
3261 printParameters (table, language);
3262
3263 paramColprintTablePrint (table, (language != LANG_AUTO),
3264 withListHeader, machinable, fp);
3265 colprintTableDelete (table);
3266}
3267
3268static void processLangAliasOption (const langType language,
3269 const char *const parameter)
3270{
3271 const char* alias;
3272 const parserObject * parser;
3273
3274 Assert (0 <= language && language < (int) LanguageCount);
3275 parser = LanguageTable + language;
3276
3277 if (parameter[0] == '\0')
3278 {
3279 clearLanguageAliases (language);
3280 verbose ("clear aliases for %s\n", parser->def->name);
3281 }
3282 else if (strcmp (parameter, RSV_LANGMAP_DEFAULT) == 0)
3283 {
3285 verbose ("reset aliases for %s\n", parser->def->name);
3286 }
3287 else if (parameter[0] == '+')
3288 {
3289 alias = parameter + 1;
3290 addLanguageAlias(language, alias);
3291 verbose ("add an alias %s to %s\n", alias, parser->def->name);
3292 }
3293 else if (parameter[0] == '-')
3294 {
3295 if (parser->currentAliases)
3296 {
3297 alias = parameter + 1;
3298 if (stringListDeleteItemExtension (parser->currentAliases, alias))
3299 {
3300 verbose ("remove an alias %s from %s\n", alias, parser->def->name);
3301 }
3302 }
3303 }
3304 else
3305 {
3306 alias = parameter;
3307 clearLanguageAliases (language);
3308 addLanguageAlias(language, alias);
3309 verbose ("set alias %s to %s\n", alias, parser->def->name);
3310 }
3311
3312}
3313
3315 const char *const option, const char *const parameter)
3316{
3317 langType language;
3318
3319 Assert (parameter);
3320
3321#define PREFIX "alias-"
3322 if (strcmp (option, "alias-" RSV_LANG_ALL) == 0)
3323 {
3324 if ((parameter[0] == '\0')
3325 || (strcmp (parameter, RSV_LANGMAP_DEFAULT) == 0))
3326 {
3327 for (unsigned int i = 0; i < LanguageCount; i++)
3328 {
3330 verbose ("clear aliases for %s\n", getLanguageName(i));
3331 }
3332
3333 if (parameter[0] != '\0')
3334 {
3335 verbose (" Installing default language aliases:\n");
3337 }
3338 }
3339 else
3340 {
3341 error (WARNING, "Use \"%s\" option for reset (\"default\") or clearing (\"\")", option);
3342 return false;
3343 }
3344 return true;
3345 }
3346
3347 language = getLanguageComponentInOption (option, "alias-");
3348 if (language == LANG_IGNORE)
3349 return false;
3350#undef PREFIX
3351
3352 processLangAliasOption (language, parameter);
3353 return true;
3354}
3355
3356static void printMaps (const langType language, langmapType type)
3357{
3358 const parserObject* parser;
3359 unsigned int i;
3360
3361 parser = LanguageTable + language;
3362 printf ("%-8s", parser->def->name);
3363 if (parser->currentPatterns != NULL && (type & LMAP_PATTERN))
3364 for (i = 0 ; i < stringListCount (parser->currentPatterns) ; ++i)
3365 printf (" %s", vStringValue (
3366 stringListItem (parser->currentPatterns, i)));
3367 if (parser->currentExtensions != NULL && (type & LMAP_EXTENSION))
3368 for (i = 0 ; i < stringListCount (parser->currentExtensions) ; ++i)
3369 printf (" *.%s", vStringValue (
3370 stringListItem (parser->currentExtensions, i)));
3371 putchar ('\n');
3372}
3373
3375{
3376 if ((type & LMAP_ALL) == LMAP_ALL)
3377 return colprintTableNew ("L:LANGUAGE", "L:TYPE", "L:MAP", NULL);
3378 else if (type & LMAP_PATTERN)
3379 return colprintTableNew ("L:LANGUAGE", "L:PATTERN", NULL);
3380 else if (type & LMAP_EXTENSION)
3381 return colprintTableNew ("L:LANGUAGE", "L:EXTENSION", NULL);
3382 else
3383 {
3385 return NULL;
3386 }
3387}
3388
3389static void mapColprintAddLanguage (struct colprintTable * table,
3390 langmapType type,
3391 const parserObject* parser)
3392{
3393 struct colprintLine * line;
3394 unsigned int count;
3395 unsigned int i;
3396
3397 if ((type & LMAP_PATTERN) && (0 < (count = stringListCount (parser->currentPatterns))))
3398 {
3399 for (i = 0; i < count; i++)
3400 {
3401 line = colprintTableGetNewLine (table);
3402 vString *pattern = stringListItem (parser->currentPatterns, i);
3403
3405 if (type & LMAP_EXTENSION)
3408 }
3409 }
3410
3411 if ((type & LMAP_EXTENSION) && (0 < (count = stringListCount (parser->currentExtensions))))
3412 {
3413 for (i = 0; i < count; i++)
3414 {
3415 line = colprintTableGetNewLine (table);
3416 vString *extension = stringListItem (parser->currentExtensions, i);
3417
3419 if (type & LMAP_PATTERN)
3422 }
3423 }
3424}
3425
3426extern void printLanguageMaps (const langType language, langmapType type,
3427 bool withListHeader, bool machinable, FILE *fp)
3428{
3429 /* DON'T SORT THE LIST
3430
3431 The order of listing should be equal to the order of matching
3432 for the parser selection. */
3433
3434 struct colprintTable * table = NULL;
3435 if (type & LMAP_TABLE_OUTPUT)
3436 table = mapColprintTableNew(type);
3437
3438 if (language == LANG_AUTO)
3439 {
3440 for (unsigned int i = 0 ; i < LanguageCount ; ++i)
3441 {
3442 if (!isLanguageVisible (i))
3443 continue;
3444
3445 if (type & LMAP_TABLE_OUTPUT)
3446 {
3447 const parserObject* parser = LanguageTable + i;
3448
3449 mapColprintAddLanguage (table, type, parser);
3450 }
3451 else
3452 printMaps (i, type);
3453 }
3454 }
3455 else
3456 {
3457 Assert (0 <= language && language < (int) LanguageCount);
3458
3459 if (type & LMAP_TABLE_OUTPUT)
3460 {
3461 const parserObject* parser = LanguageTable + language;
3462
3463 mapColprintAddLanguage (table, type, parser);
3464 }
3465 else
3466 printMaps (language, type);
3467 }
3468
3469
3470 if (type & LMAP_TABLE_OUTPUT)
3471 {
3472 colprintTablePrint (table, (language == LANG_AUTO)? 0: 1,
3473 withListHeader, machinable, fp);
3474 colprintTableDelete (table);
3475 }
3476}
3477
3479{
3480 return colprintTableNew ("L:LANGUAGE", "L:ALIAS", NULL);
3481}
3482
3483static void aliasColprintAddLanguage (struct colprintTable * table,
3484 const parserObject* parser)
3485{
3486 unsigned int count;
3487
3488 if (parser->currentAliases && (0 < (count = stringListCount (parser->currentAliases))))
3489 {
3490 for (unsigned int i = 0; i < count; i++)
3491 {
3492 struct colprintLine * line = colprintTableGetNewLine (table);
3493 vString *alias = stringListItem (parser->currentAliases, i);;
3494
3497 }
3498 }
3499}
3500
3501extern void printLanguageAliases (const langType language,
3502 bool withListHeader, bool machinable, FILE *fp)
3503{
3504 /* DON'T SORT THE LIST
3505
3506 The order of listing should be equal to the order of matching
3507 for the parser selection. */
3508
3509 struct colprintTable * table = aliasColprintTableNew();
3510 const parserObject* parser;
3511
3512 if (language == LANG_AUTO)
3513 {
3514 for (unsigned int i = 0; i < LanguageCount; ++i)
3515 {
3516 parser = LanguageTable + i;
3517 if (parser->def->invisible)
3518 continue;
3519
3520 aliasColprintAddLanguage (table, parser);
3521 }
3522 }
3523 else
3524 {
3525 Assert (0 <= language && language < (int) LanguageCount);
3526 parser = LanguageTable + language;
3527 aliasColprintAddLanguage (table, parser);
3528 }
3529
3530 colprintTablePrint (table, (language == LANG_AUTO)? 0: 1,
3531 withListHeader, machinable, fp);
3532 colprintTableDelete (table);
3533}
3534
3535static void printLanguage (const langType language, parserDefinition** ltable)
3536{
3537 const parserDefinition* lang;
3538 Assert (0 <= language && language < (int) LanguageCount);
3539 lang = ltable [language];
3540
3541 if (lang->invisible)
3542 return;
3543
3544 if (lang->kindTable != NULL || (lang->method & METHOD_REGEX))
3545 printf ("%s%s\n", lang->name, isLanguageEnabled (lang->id) ? "" : " [disabled]");
3546}
3547
3548extern void printLanguageList (void)
3549{
3550 unsigned int i;
3551 parserDefinition **ltable;
3552
3554 for (i = 0 ; i < LanguageCount ; ++i)
3555 ltable[i] = LanguageTable[i].def;
3556 qsort (ltable, LanguageCount, sizeof (parserDefinition*), compareParsersByName);
3557
3558 for (i = 0 ; i < LanguageCount ; ++i)
3559 printLanguage (i, ltable);
3560
3561 eFree (ltable);
3562}
3563
3565{
3566 eFree ((void *)xdef->name);
3567 eFree ((void *)xdef->description);
3568 eFree (xdef);
3569}
3570
3571static bool processLangDefineExtra (const langType language,
3572 const char *const option,
3573 const char *const parameter)
3574{
3575 xtagDefinition *xdef;
3576 const char * p = parameter;
3577 const char *name_end;
3578 const char *desc;
3579 const char *flags;
3580
3581 Assert (0 <= language && language < (int) LanguageCount);
3582 Assert (p);
3583
3584 if (p[0] == '\0')
3585 error (FATAL, "no extra definition specified in \"--%s\" option", option);
3586
3587 name_end = strchr (p, ',');
3588 if (!name_end)
3589 error (FATAL, "no extra description specified in \"--%s\" option", option);
3590 else if (name_end == p)
3591 error (FATAL, "the extra name in \"--%s\" option is empty", option);
3592
3593 for (; p < name_end; p++)
3594 {
3595 if (!isalnum (*p))
3596 error (FATAL, "unacceptable char as part of extra name in \"--%s\" option",
3597 option);
3598 }
3599
3600 p++;
3601 if (p [0] == '\0' || p [0] == LONG_FLAGS_OPEN)
3602 error (FATAL, "extra description in \"--%s\" option is empty", option);
3603
3604 desc = extractDescriptionAndFlags (p, &flags);
3605
3606 xdef = xCalloc (1, xtagDefinition);
3607 xdef->enabled = false;
3608 xdef->letter = NUL_XTAG_LETTER;
3609 xdef->name = eStrndup (parameter, name_end - parameter);
3610 xdef->description = desc;
3611 xdef->isEnabled = NULL;
3613
3614 if (flags)
3615 flagsEval (flags, NULL, 0, xdef);
3616
3617 defineXtag (xdef, language);
3618
3619 return true;
3620}
3621
3622extern bool processExtradefOption (const char *const option, const char *const parameter)
3623{
3624 langType language;
3625
3626 language = getLanguageComponentInOption (option, "_" "extradef-");
3627 if (language == LANG_IGNORE)
3628 return false;
3629
3630 return processLangDefineExtra (language, option, parameter);
3631}
3632
3634{
3635 eFree ((void *)fdef->description);
3636 eFree ((void *)fdef->name);
3637 eFree (fdef);
3638}
3639
3640static bool processLangDefineField (const langType language,
3641 const char *const option,
3642 const char *const parameter)
3643{
3644 fieldDefinition *fdef;
3645 const char * p = parameter;
3646 const char *name_end;
3647 const char *desc;
3648 const char *flags;
3649
3650 Assert (0 <= language && language < (int) LanguageCount);
3651 Assert (p);
3652
3653 if (p[0] == '\0')
3654 error (FATAL, "no field definition specified in \"--%s\" option", option);
3655
3656 name_end = strchr (p, ',');
3657 if (!name_end)
3658 error (FATAL, "no field description specified in \"--%s\" option", option);
3659 else if (name_end == p)
3660 error (FATAL, "the field name in \"--%s\" option is empty", option);
3661
3662 for (; p < name_end; p++)
3663 {
3664 if (!isalpha (*p))
3665 error (FATAL, "unacceptable char as part of field name in \"--%s\" option",
3666 option);
3667 }
3668
3669 p++;
3670 if (p [0] == '\0' || p [0] == LONG_FLAGS_OPEN)
3671 error (FATAL, "field description in \"--%s\" option is empty", option);
3672
3673 desc = extractDescriptionAndFlags (p, &flags);
3674
3675 fdef = xCalloc (1, fieldDefinition);
3676 fdef->enabled = false;
3677 fdef->letter = NUL_FIELD_LETTER;
3678 fdef->name = eStrndup(parameter, name_end - parameter);
3679 fdef->description = desc;
3680 fdef->isValueAvailable = NULL;
3681 fdef->dataType = FIELDTYPE_STRING; /* TODO */
3682 fdef->ftype = FIELD_UNKNOWN;
3684
3685 if (flags)
3686 flagsEval (flags, NULL, 0, fdef);
3687
3688 defineField (fdef, language);
3689
3690 return true;
3691}
3692
3693extern bool processFielddefOption (const char *const option, const char *const parameter)
3694{
3695 langType language;
3696
3697 language = getLanguageComponentInOption (option, "_fielddef-");
3698 if (language == LANG_IGNORE)
3699 return false;
3700
3701 return processLangDefineField (language, option, parameter);
3702}
3703
3704/*
3705* File parsing
3706*/
3707
3709 const unsigned int passCount)
3710{
3711 parserDefinition *const lang = LanguageTable [language].def;
3712 rescanReason rescan = RESCAN_NONE;
3713
3714 resetInputFile (language);
3715
3716 Assert (lang->parser || lang->parser2);
3717
3720
3721 if (lang->parser != NULL)
3722 lang->parser ();
3723 else if (lang->parser2 != NULL)
3724 rescan = lang->parser2 (passCount);
3725
3726 notifyInputEnd ();
3727 notifyLanguageRegexInputEnd (language);
3728
3729 return rescan;
3730}
3731
3733{
3735}
3736
3738{
3740}
3741
3742static unsigned int parserCorkFlags (parserDefinition *parser)
3743{
3744 subparser *tmp;
3745 unsigned int r = 0;
3746
3747 r |= parser->useCork;
3748
3749 if (hasLanguageScopeActionInRegex (parser->id)
3750 || parser->requestAutomaticFQTag)
3751 r |= CORK_QUEUE;
3752
3753 pushLanguage (parser->id);
3754 foreachSubparser(tmp, true)
3755 {
3757 r |= parserCorkFlags (LanguageTable[t].def);
3758 }
3759 popLanguage ();
3760 return r;
3761}
3762
3763static void setupLanguageSubparsersInUse (const langType language)
3764{
3765 subparser *tmp;
3766
3768 foreachSubparser(tmp, true)
3769 {
3771 enterSubparser (tmp);
3773 leaveSubparser ();
3774 }
3775}
3776
3778{
3779 subparser *tmp;
3780
3781 foreachSubparser(tmp, true)
3782 {
3784 enterSubparser (tmp);
3786 leaveSubparser ();
3787 }
3789}
3790
3792{
3793 if (Option.printTotals > 1 && parser->used == 0 && parser->def->initStats)
3794 parser->def->initStats (parser->def->id);
3795 parser->used = 1;
3796}
3797
3799{
3800 parserObject *parser = &(LanguageTable [language]);
3801
3802 if (parser->used)
3803 {
3804 if (parser->def->printStats)
3805 {
3806 fprintf(stderr, "\nSTATISTICS of %s\n", getLanguageName (language));
3807 fputs("==============================================\n", stderr);
3808 parser->def->printStats (language);
3809 }
3811 }
3812}
3813
3814static bool createTagsWithFallback1 (const langType language,
3815 langType *exclusive_subparser)
3816{
3817 bool tagFileResized = false;
3818 unsigned long numTags = numTagsAdded ();
3819 MIOPos tagfpos;
3820 int lastPromise = getLastPromise ();
3821 unsigned int passCount = 0;
3822 rescanReason whyRescan;
3823 parserObject *parser;
3824 unsigned int corkFlags;
3825 bool useCork = false;
3826
3827 initializeParser (language);
3828 parser = &(LanguageTable [language]);
3829
3831
3832 corkFlags = parserCorkFlags (parser->def);
3833 useCork = corkFlags & CORK_QUEUE;
3834 if (useCork)
3835 corkTagFile(corkFlags);
3836
3837 addParserPseudoTags (language);
3838 initializeParserStats (parser);
3839 tagFilePosition (&tagfpos);
3840
3841 anonResetMaybe (parser);
3842
3843 while ( ( whyRescan =
3844 createTagsForFile (language, ++passCount) )
3845 != RESCAN_NONE)
3846 {
3847 if (useCork)
3848 {
3849 uncorkTagFile();
3850 corkTagFile(corkFlags);
3851 }
3852
3853
3854 if (whyRescan == RESCAN_FAILED)
3855 {
3856 /* Restore prior state of tag file.
3857 */
3858 setTagFilePosition (&tagfpos);
3859 setNumTagsAdded (numTags);
3860 writerRescanFailed (numTags);
3861 tagFileResized = true;
3862 breakPromisesAfter(lastPromise);
3863 }
3864 else if (whyRescan == RESCAN_APPEND)
3865 {
3866 tagFilePosition (&tagfpos);
3867 numTags = numTagsAdded ();
3868 lastPromise = getLastPromise ();
3869 }
3870 }
3871
3872 /* Force filling allLines buffer and kick the multiline regex parser */
3873 if (hasLanguageMultilineRegexPatterns (language))
3874 while (readLineFromInputFile () != NULL)
3875 ; /* Do nothing */
3876
3877 if (useCork)
3878 uncorkTagFile();
3879
3880 {
3882 if (exclusive_subparser && s)
3883 *exclusive_subparser = getSubparserLanguage (s);
3884 }
3885
3886 return tagFileResized;
3887}
3888
3889extern bool runParserInNarrowedInputStream (const langType language,
3890 unsigned long startLine, long startCharOffset,
3891 unsigned long endLine, long endCharOffset,
3892 unsigned long sourceLineOffset,
3893 int promise)
3894{
3895 bool tagFileResized;
3896
3897 verbose ("runParserInNarrowedInputStream: %s; "
3898 "file: %s, "
3899 "start(line: %lu, offset: %ld, srcline: %lu)"
3900 " - "
3901 "end(line: %lu, offset: %ld)\n",
3902 getLanguageName (language),
3904 startLine, startCharOffset, sourceLineOffset,
3905 endLine, endCharOffset);
3906
3908 startLine, startCharOffset,
3909 endLine, endCharOffset,
3910 sourceLineOffset,
3911 promise);
3912 tagFileResized = createTagsWithFallback1 (language, NULL);
3914 return tagFileResized;
3915
3916}
3917
3919 const char *const fileName, const langType language,
3920 MIO *mio, bool *failureInOpenning)
3921{
3922 langType exclusive_subparser = LANG_IGNORE;
3923 bool tagFileResized = false;
3924
3925 Assert (0 <= language && language < (int) LanguageCount);
3926
3927 if (!openInputFile (fileName, language, mio))
3928 {
3929 *failureInOpenning = true;
3930 return false;
3931 }
3932 *failureInOpenning = false;
3933
3934 tagFileResized = createTagsWithFallback1 (language,
3935 &exclusive_subparser);
3936 tagFileResized = forcePromises()? true: tagFileResized;
3937
3938 pushLanguage ((exclusive_subparser == LANG_IGNORE)
3939 ? language
3940 : exclusive_subparser);
3941 makeFileTag (fileName);
3942 popLanguage ();
3943 closeInputFile ();
3944
3945 return tagFileResized;
3946}
3947
3948static void printGuessedParser (const char* const fileName, langType language)
3949{
3950 const char *parserName;
3951
3952 if (language == LANG_IGNORE)
3953 {
3954 Option.printLanguage = ((int)true) + 1;
3955 parserName = RSV_NONE;
3956 }
3957 else
3958 {
3959 parserName = getLanguageName (language);
3960 }
3961
3962 printf("%s: %s\n", fileName, parserName);
3963}
3964
3965#ifdef HAVE_ICONV
3966static char **EncodingMap;
3967static unsigned int EncodingMapMax;
3968
3969static void addLanguageEncoding (const langType language,
3970 const char *const encoding)
3971{
3972 if (language > EncodingMapMax || EncodingMapMax == 0)
3973 {
3974 int i;
3975 int istart = (EncodingMapMax == 0)? 0: EncodingMapMax + 1;
3976 EncodingMap = xRealloc (EncodingMap, (language + 1), char*);
3977 for (i = istart; i <= language ; ++i)
3978 {
3979 EncodingMap [i] = NULL;
3980 }
3981 EncodingMapMax = language;
3982 }
3983 if (EncodingMap [language])
3984 eFree (EncodingMap [language]);
3985 EncodingMap [language] = eStrdup(encoding);
3986 if (!Option.outputEncoding)
3987 Option.outputEncoding = eStrdup("UTF-8");
3988}
3989
3990extern bool processLanguageEncodingOption (const char *const option, const char *const parameter)
3991{
3992 langType language;
3993
3994 language = getLanguageComponentInOption (option, "input-encoding-");
3995 if (language == LANG_IGNORE)
3996 return false;
3997
3998 addLanguageEncoding (language, parameter);
3999 return true;
4000}
4001
4002extern void freeEncodingResources (void)
4003{
4004 if (EncodingMap)
4005 {
4006 unsigned int i;
4007 for (i = 0 ; i <= EncodingMapMax ; ++i)
4008 {
4009 if (EncodingMap [i])
4010 eFree (EncodingMap [i]);
4011 }
4012 eFree (EncodingMap);
4013 }
4014 if (Option.inputEncoding)
4015 eFree (Option.inputEncoding);
4016 if (Option.outputEncoding)
4017 eFree (Option.outputEncoding);
4018}
4019
4020extern const char *getLanguageEncoding (const langType language)
4021{
4022 if (EncodingMap && language <= EncodingMapMax && EncodingMap [language])
4023 return EncodingMap[language];
4024 else
4025 return Option.inputEncoding;
4026}
4027#endif
4028
4029static void addParserPseudoTags (langType language)
4030{
4031 parserObject *parser = LanguageTable + language;
4032 if (!parser->pseudoTagPrinted)
4033 {
4034 for (int i = 0; i < PTAG_COUNT; i++)
4035 {
4036 if (isPtagParserSpecific (i))
4037 makePtagIfEnabled (i, language, parser);
4038 }
4039 parser->pseudoTagPrinted = 1;
4040 }
4041}
4042
4043extern bool doesParserRequireMemoryStream (const langType language)
4044{
4045 Assert (0 <= language && language < (int) LanguageCount);
4046 parserDefinition *const lang = LanguageTable [language].def;
4047 unsigned int i;
4048
4049 if (lang->tagXpathTableCount > 0
4050 || lang->useMemoryStreamInput)
4051 {
4052 verbose ("%s requires a memory stream for input\n", lang->name);
4053 return true;
4054 }
4055
4056 for (i = 0; i < lang->dependencyCount; i++)
4057 {
4058 parserDependency *d = lang->dependencies + i;
4059 if (d->type == DEPTYPE_SUBPARSER &&
4060 ((subparser *)(d->data))->direction & SUBPARSER_SUB_RUNS_BASE)
4061 {
4062 langType baseParser;
4063 baseParser = getNamedLanguage (d->upperParser, 0);
4064 if (doesParserRequireMemoryStream(baseParser))
4065 {
4066 verbose ("%s/%s requires a memory stream for input\n", lang->name,
4067 LanguageTable[baseParser].def->name);
4068 return true;
4069 }
4070 }
4071 }
4072
4073 return false;
4074}
4075
4076extern bool parseFile (const char *const fileName)
4077{
4078 TRACE_ENTER_TEXT("Parsing file %s",fileName);
4079 bool bRet = parseFileWithMio (fileName, NULL, NULL);
4080 TRACE_LEAVE();
4081 return bRet;
4082}
4083
4084static bool parseMio (const char *const fileName, langType language, MIO* mio, bool useSourceFileTagPath,
4085 void *clientData)
4086{
4087 bool tagFileResized = false;
4088 bool failureInOpenning = false;
4089
4090 setupWriter (clientData);
4091
4092 setupAnon ();
4093
4095
4096 tagFileResized = createTagsWithFallback (fileName, language, mio, &failureInOpenning);
4097
4099
4100 teardownAnon ();
4101
4102 if (useSourceFileTagPath && (!failureInOpenning))
4103 return teardownWriter (getSourceFileTagPath())? true: tagFileResized;
4104 else
4105 return teardownWriter(fileName);
4106}
4107
4108extern bool parseFileWithMio (const char *const fileName, MIO *mio,
4109 void *clientData)
4110{
4111 bool tagFileResized = false;
4112 langType language;
4113 struct GetLanguageRequest req = {
4115 .fileName = fileName,
4116 .mio = mio,
4117 };
4118
4119 language = getFileLanguageForRequest (&req);
4120 Assert (language != LANG_AUTO);
4121
4123 {
4124 printGuessedParser (fileName, language);
4125 return tagFileResized;
4126 }
4127
4128 if (language == LANG_IGNORE)
4129 verbose ("ignoring %s (unknown language/language disabled)\n",
4130 fileName);
4131 else
4132 {
4133 Assert(isLanguageEnabled (language));
4134
4136 openTagFile ();
4137
4138#ifdef HAVE_ICONV
4139 /* TODO: checkUTF8BOM can be used to update the encodings. */
4140 openConverter (getLanguageEncoding (language), Option.outputEncoding);
4141#endif
4142 tagFileResized = parseMio (fileName, language, req.mio, true, clientData);
4144 closeTagFile (tagFileResized);
4145 addTotals (1, 0L, 0L);
4146
4147#ifdef HAVE_ICONV
4148 closeConverter ();
4149#endif
4150 }
4151
4152 if (req.type == GLR_OPEN && req.mio)
4153 mio_unref (req.mio);
4154
4155 return tagFileResized;
4156}
4157
4158extern bool parseRawBuffer(const char *fileName, unsigned char *buffer,
4159 size_t bufferSize, const langType language, void *clientData)
4160{
4161 MIO *mio = NULL;
4162 bool r;
4163
4164 if (buffer)
4165 mio = mio_new_memory (buffer, bufferSize, NULL, NULL);
4166
4167 r = parseMio (fileName, language, mio, false, clientData);
4168
4169 if (buffer)
4170 mio_unref (mio);
4171
4172 return r;
4173}
4174
4176 bool (* func) (struct lregexControlBlock *, const vString* const),
4177 const vString* const allLines)
4178{
4179 subparser *tmp;
4180
4181 func ((LanguageTable + language)->lregexControlBlock, allLines);
4182 foreachSubparser(tmp, true)
4183 {
4185 enterSubparser (tmp);
4186 matchLanguageMultilineRegexCommon (t, func, allLines);
4187 leaveSubparser ();
4188 }
4189}
4190
4191extern void matchLanguageMultilineRegex (const langType language,
4192 const vString* const allLines)
4193{
4195}
4196
4197extern void matchLanguageMultitableRegex (const langType language,
4198 const vString* const allLines)
4199{
4201}
4202
4203extern void processLanguageMultitableExtendingOption (langType language, const char *const parameter)
4204{
4205 const char* src;
4206 char* dist;
4207 const char *tmp;
4208
4209 tmp = strchr(parameter, '+');
4210
4211 if (!tmp)
4212 error (FATAL, "no separator(+) found: %s", parameter);
4213
4214 if (tmp == parameter)
4215 error (FATAL, "the name of source table is empty in table extending: %s", parameter);
4216
4217 src = tmp + 1;
4218 if (!*src)
4219 error (FATAL, "the name of dist table is empty in table extending: %s", parameter);
4220
4221 dist = eStrndup(parameter, tmp - parameter);
4222 extendRegexTable(((LanguageTable + language)->lregexControlBlock), src, dist);
4223 eFree (dist);
4224}
4225
4226static bool lregexQueryParserAndSubparsers (const langType language, bool (* predicate) (struct lregexControlBlock *))
4227{
4228 bool r;
4229 subparser *tmp;
4230
4231 r = predicate ((LanguageTable + language)->lregexControlBlock);
4232 if (!r)
4233 {
4234 foreachSubparser(tmp, true)
4235 {
4237 enterSubparser (tmp);
4238 r = lregexQueryParserAndSubparsers (t, predicate);
4239 leaveSubparser ();
4240
4241 if (r)
4242 break;
4243 }
4244 }
4245
4246 return r;
4247}
4248
4250{
4252}
4253
4254
4255extern void addLanguageCallbackRegex (const langType language, const char *const regex, const char *const flags,
4256 const regexCallback callback, bool *disabled, void *userData)
4257{
4258 addCallbackRegex ((LanguageTable +language)->lregexControlBlock, regex, flags, callback, disabled, userData);
4259}
4260
4261extern bool hasLanguageScopeActionInRegex (const langType language)
4262{
4263 bool hasScopeAction;
4264
4265 pushLanguage (language);
4266 hasScopeAction = lregexQueryParserAndSubparsers (language, hasScopeActionInRegex);
4267 popLanguage ();
4268
4269 return hasScopeAction;
4270}
4271
4272extern void matchLanguageRegex (const langType language, const vString* const line)
4273{
4274 subparser *tmp;
4275
4277 foreachSubparser(tmp, true)
4278 {
4280 enterSubparser (tmp);
4282 leaveSubparser ();
4283 }
4284}
4285
4287 enum regexParserType regptype,
4288 const char *const parameter)
4289{
4291 regptype, parameter);
4292
4293 return true;
4294}
4295
4296extern bool processTabledefOption (const char *const option, const char *const parameter)
4297{
4298 langType language;
4299
4300 language = getLanguageComponentInOption (option, "_tabledef-");
4301 if (language == LANG_IGNORE)
4302 return false;
4303
4304 if (parameter == NULL || parameter[0] == '\0')
4305 error (FATAL, "A parameter is needed after \"%s\" option", option);
4306
4307 addRegexTable((LanguageTable +language)->lregexControlBlock, parameter);
4308 return true;
4309}
4310
4311extern void useRegexMethod (const langType language)
4312{
4313 parserDefinition* lang;
4314
4315 Assert (0 <= language && language < (int) LanguageCount);
4316 lang = LanguageTable [language].def;
4317 lang->method |= METHOD_REGEX;
4318}
4319
4320static void useXpathMethod (const langType language)
4321{
4322 parserDefinition* lang;
4323
4324 Assert (0 <= language && language < (int) LanguageCount);
4325 lang = LanguageTable [language].def;
4326 lang->method |= METHOD_XPATH;
4327}
4328
4329static void installTagRegexTable (const langType language)
4330{
4331 parserObject* parser;
4332 parserDefinition* lang;
4333 unsigned int i;
4334
4335 Assert (0 <= language && language < (int) LanguageCount);
4336 parser = LanguageTable + language;
4337 lang = parser->def;
4338
4339
4340 if (lang->tagRegexTable != NULL)
4341 {
4342 for (i = 0; i < lang->tagRegexCount; ++i)
4343 {
4344 if (lang->tagRegexTable [i].mline)
4346 lang->tagRegexTable [i].regex,
4347 lang->tagRegexTable [i].name,
4348 lang->tagRegexTable [i].kinds,
4349 lang->tagRegexTable [i].flags,
4350 (lang->tagRegexTable [i].disabled));
4351 else
4353 lang->tagRegexTable [i].regex,
4354 lang->tagRegexTable [i].name,
4355 lang->tagRegexTable [i].kinds,
4356 lang->tagRegexTable [i].flags,
4357 (lang->tagRegexTable [i].disabled));
4358 }
4359 }
4360}
4361
4362static void installKeywordTable (const langType language)
4363{
4364 parserDefinition* lang;
4365 unsigned int i;
4366
4367 Assert (0 <= language && language < (int) LanguageCount);
4368 lang = LanguageTable [language].def;
4369
4370 if (lang->keywordTable != NULL)
4371 {
4372 for (i = 0; i < lang->keywordCount; ++i)
4373 addKeyword (lang->keywordTable [i].name,
4374 language,
4375 lang->keywordTable [i].id);
4376 }
4377}
4378
4379static void installTagXpathTable (const langType language)
4380{
4381 parserDefinition* lang;
4382 unsigned int i, j;
4383
4384 Assert (0 <= language && language < (int) LanguageCount);
4385 lang = LanguageTable [language].def;
4386
4387 if (lang->tagXpathTableTable != NULL)
4388 {
4389 for (i = 0; i < lang->tagXpathTableCount; ++i)
4390 for (j = 0; j < lang->tagXpathTableTable[i].count; ++j)
4391 addTagXpath (language, lang->tagXpathTableTable[i].table + j);
4392 useXpathMethod (language);
4393 }
4394}
4395
4396static void uninstallTagXpathTable (const langType language)
4397{
4398 parserDefinition* lang;
4399 unsigned int i, j;
4400
4401 Assert (0 <= language && language < (int) LanguageCount);
4402 lang = LanguageTable [language].def;
4403
4404 if (lang->tagXpathTableTable != NULL)
4405 {
4406 for (i = 0; i < lang->tagXpathTableCount; ++i)
4407 for (j = 0; j < lang->tagXpathTableTable[i].count; ++j)
4408 removeTagXpath (language, lang->tagXpathTableTable[i].table + j);
4409 }
4410}
4411
4412const tagXpathTableTable *getXpathTableTable (const langType language, unsigned int nth)
4413{
4414 parserDefinition* lang;
4415
4416 Assert (0 <= language && language < (int) LanguageCount);
4417 lang = LanguageTable [language].def;
4418
4419 Assert (nth < lang->tagXpathTableCount);
4420 return lang->tagXpathTableTable + nth;
4421}
4422
4423extern unsigned int getXpathFileSpecCount (const langType language)
4424{
4425 parserDefinition* lang;
4426
4427 Assert (0 <= language && language < (int) LanguageCount);
4428 lang = LanguageTable [language].def;
4429
4430 return lang->xpathFileSpecCount;
4431}
4432
4433extern xpathFileSpec* getXpathFileSpec (const langType language, unsigned int nth)
4434{
4435 parserDefinition* lang;
4436
4437 Assert (0 <= language && language < (int) LanguageCount);
4438 lang = LanguageTable [language].def;
4439
4440 Assert (nth < lang->xpathFileSpecCount);
4441 return lang->xpathFileSpecs + nth;
4442}
4443
4444extern bool makeKindSeparatorsPseudoTags (const langType language,
4445 const ptagDesc *pdesc)
4446{
4447 parserObject* parser;
4448 parserDefinition* lang;
4449 struct kindControlBlock *kcb;
4451 unsigned int kindCount;
4452 unsigned int i, j;
4453
4454 bool r = false;
4455
4456 Assert (0 <= language && language < (int) LanguageCount);
4457 parser = LanguageTable + language;
4458 lang = parser->def;
4459 kcb = parser->kindControlBlock;
4460 kindCount = countKinds(kcb);
4461
4462 if (kindCount == 0)
4463 return r;
4464
4465 vString *sepval = vStringNew();
4466 for (i = 0; i < kindCount; ++i)
4467 {
4468 kind = getKind (kcb, i);
4469 for (j = 0; j < kind->separatorCount; ++j)
4470 {
4471 char name[3] = {[1] = '\0', [2] = '\0'};
4472 const kindDefinition *upperKind;
4473 const scopeSeparator *sep;
4474
4475 sep = kind->separators + j;
4476
4478 {
4480 name[1] = kind->letter;
4481 }
4482 else if (sep->parentKindIndex == KIND_GHOST_INDEX)
4483 {
4484 /* This is root separator: no upper item is here. */
4485 name[0] = kind->letter;
4486 }
4487 else
4488 {
4489 upperKind = getLanguageKind (language,
4490 sep->parentKindIndex);
4491 if (!upperKind)
4492 continue;
4493
4494 name[0] = upperKind->letter;
4495 name[1] = kind->letter;
4496 }
4497
4498
4499 vStringClear (sepval);
4500 vStringCatSWithEscaping (sepval, sep->separator);
4501
4502 r = writePseudoTag (pdesc, vStringValue (sepval),
4503 name, lang->name) || r;
4504 }
4505 }
4506 vStringDelete (sepval);
4507
4508 return r;
4509}
4510
4512 const char* langName;
4515};
4516
4518 void *user_data)
4519{
4520 struct makeKindDescriptionPseudoTagData *data = user_data;
4521 vString *letter_and_name;
4522 vString *description;
4523 const char *d;
4524
4525 letter_and_name = vStringNew ();
4526 description = vStringNew ();
4527
4528 vStringPut (letter_and_name, kind -> letter);
4529 vStringPut (letter_and_name, ',');
4530 vStringCatS (letter_and_name, kind -> name);
4531
4532 d = kind->description? kind->description: kind->name;
4533 vStringCatSWithEscapingAsPattern (description, d);
4534 data->written |= writePseudoTag (data->pdesc, vStringValue (letter_and_name),
4535 vStringValue (description),
4536 data->langName);
4537
4538 vStringDelete (description);
4539 vStringDelete (letter_and_name);
4540
4541 return false;
4542}
4543
4544extern bool makeKindDescriptionsPseudoTags (const langType language,
4545 const ptagDesc *pdesc)
4546{
4547 parserObject *parser;
4548 struct kindControlBlock *kcb;
4549 parserDefinition* lang;
4551 unsigned int kindCount, i;
4553
4554 Assert (0 <= language && language < (int) LanguageCount);
4555 parser = LanguageTable + language;
4556 kcb = parser->kindControlBlock;
4557 lang = parser->def;
4558
4559 kindCount = countKinds(kcb);
4560
4561 data.langName = lang->name;
4562 data.pdesc = pdesc;
4563 data.written = false;
4564
4565 for (i = 0; i < kindCount; ++i)
4566 {
4567 if (!isLanguageKindEnabled (language, i))
4568 continue;
4569
4570 kind = getKind (kcb, i);
4571 makeKindDescriptionPseudoTag (kind, &data);
4572 }
4573
4574 return data.written;
4575}
4576
4577static bool makeFieldDescriptionPseudoTag (const langType language,
4578 fieldType f,
4579 const ptagDesc *pdesc)
4580{
4581 vString *description;
4582 const char *name = getFieldName (f);
4583
4584 if (name == NULL || name [0] == '\0')
4585 return false;
4586
4587 description = vStringNew ();
4590
4591 bool r = writePseudoTag (pdesc, name,
4592 vStringValue (description),
4593 language == LANG_IGNORE? NULL: getLanguageName (language));
4594
4595 vStringDelete (description);
4596 return r;
4597}
4598
4599extern bool makeFieldDescriptionsPseudoTags (const langType language,
4600 const ptagDesc *pdesc)
4601{
4602 bool written = false;
4603 for (int i = 0; i < countFields (); i++)
4604 {
4605 if (getFieldOwner (i) == language
4606 && isFieldEnabled (i))
4607 {
4608 if (makeFieldDescriptionPseudoTag (language, i, pdesc))
4609 written = true;
4610 }
4611 }
4612 return written;
4613}
4614
4615static bool makeExtraDescriptionPseudoTag (const langType language,
4616 xtagType x,
4617 const ptagDesc *pdesc)
4618{
4619 vString *description;
4620 const char *name = getXtagName (x);
4621
4622 if (name == NULL || name [0] == '\0')
4623 return false;
4624
4625 description = vStringNew ();
4627 getXtagDescription (x));
4628
4629 bool r = writePseudoTag (pdesc, name,
4630 vStringValue (description),
4631 language == LANG_IGNORE? NULL: getLanguageName (language));
4632
4633 vStringDelete (description);
4634 return r;
4635}
4636
4637extern bool makeExtraDescriptionsPseudoTags (const langType language,
4638 const ptagDesc *pdesc)
4639{
4640 bool written = false;
4641 for (int i = 0; i < countXtags (); i++)
4642 {
4643 if (getXtagOwner (i) == language
4644 && isXtagEnabled (i))
4645 {
4646 if (makeExtraDescriptionPseudoTag (language, i, pdesc))
4647 written = true;
4648 }
4649 }
4650 return written;
4651}
4652
4653/*
4654* Copyright (c) 2016, Szymon Tomasz Stefanek
4655*
4656* This source code is released for free distribution under the terms of the
4657* GNU General Public License version 2 or (at your option) any later version.
4658*
4659* Anonymous name generator
4660*/
4662
4663static void setupAnon (void)
4664{
4666}
4667
4668static void teardownAnon (void)
4669{
4671}
4672
4673static void anonResetMaybe (parserObject *parser)
4674{
4676 return;
4677
4678 parser -> anonymousIdentiferId = 0;
4680}
4681
4682static unsigned int anonHash(const unsigned char *str)
4683{
4684 unsigned int hash = 5381;
4685 int c;
4686
4687 while((c = *str++))
4688 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
4689
4690 return hash ;
4691}
4692
4693extern void anonHashString (const char *filename, char buf[9])
4694{
4695 sprintf(buf, "%08x", anonHash((const unsigned char *)filename));
4696}
4697
4698
4699extern void anonGenerate (vString *buffer, const char *prefix, int kind)
4700{
4702 parser -> anonymousIdentiferId ++;
4703
4704 char szNum[32];
4705#if 0
4706 char buf [9];
4707
4708 vStringCopyS(buffer, prefix);
4709
4711 sprintf(szNum,"%s%02x%02x",buf,parser -> anonymousIdentiferId, kind);
4712#else
4713 /* we want to see numbers for anon functions in the tree view instead of the hash */
4714 vStringCopyS(buffer, prefix);
4715 sprintf(szNum,"%u", parser -> anonymousIdentiferId);
4716#endif
4717 vStringCatS(buffer,szNum);
4718}
4719
4720extern vString *anonGenerateNew (const char *prefix, int kind)
4721{
4722 vString *buffer = vStringNew ();
4723
4724 anonGenerate (buffer, prefix, kind);
4725 return buffer;
4726}
4727
4728
4729extern void applyParameter (const langType language, const char *name, const char *args)
4730{
4731 parserDefinition* parser;
4732
4733
4734 Assert (0 <= language && language < (int) LanguageCount);
4735
4736 initializeParserOne (language);
4737 parser = LanguageTable [language].def;
4738
4739 if (parser->parameterHandlerTable)
4740 {
4741 unsigned int i;
4742
4743 for (i = 0; i < parser->parameterHandlerCount; i++)
4744 {
4745 if (strcmp (parser->parameterHandlerTable [i].name, name) == 0)
4746 {
4747 parser->parameterHandlerTable [i].handleParameter (language, name, args);
4748 return;
4749 }
4750 }
4751 }
4752
4753 error (FATAL, "no such parameter in %s: %s", parser->name, name);
4754}
4755
4757 bool includingNoneCraftedParser)
4758{
4759 langType lang = getInputLanguage ();
4760 parserObject *parser = LanguageTable + lang;
4761 subparser *r;
4762 langType t;
4763
4764 if (last == NULL)
4766 else
4767 r = last->next;
4768
4769 if (r == NULL)
4770 return r;
4771
4772 t = getSubparserLanguage(r);
4773 if (isLanguageEnabled (t) &&
4774 (includingNoneCraftedParser
4775 || ((((LanguageTable + t)->def->method) & METHOD_NOT_CRAFTED) == 0)))
4776 return r;
4777 else
4778 return getNextSubparser (r, includingNoneCraftedParser);
4779}
4780
4782{
4783 langType lang = getInputLanguage ();
4784 parserObject *parser = LanguageTable + lang;
4785 slaveParser *r;
4786
4787 if (last == NULL)
4789 else
4790 r = last->next;
4791
4792 return r;
4793}
4794
4795extern void scheduleRunningBaseparser (int dependencyIndex)
4796{
4797 langType current = getInputLanguage ();
4798 parserDefinition *current_parser = LanguageTable [current].def;
4799 parserDependency *dep = NULL;
4800
4801 if (dependencyIndex == RUN_DEFAULT_SUBPARSERS)
4802 {
4803 for (unsigned int i = 0; i < current_parser->dependencyCount; ++i)
4804 if (current_parser->dependencies[i].type == DEPTYPE_SUBPARSER)
4805 {
4806 dep = current_parser->dependencies + i;
4807 break;
4808 }
4809 }
4810 else
4811 dep = current_parser->dependencies + dependencyIndex;
4812
4813 if (dep == NULL)
4814 return;
4815
4816 const char *base_name = dep->upperParser;
4817 langType base = getNamedLanguage (base_name, 0);
4818 parserObject *base_parser = LanguageTable + base;
4819
4820 if (dependencyIndex == RUN_DEFAULT_SUBPARSERS)
4822 else
4824 dep->data);
4825
4826 if (!isLanguageEnabled (base))
4827 {
4828 enableLanguage (base, true);
4829 base_parser->dontEmit = true;
4830 verbose ("force enable \"%s\" as base parser\n", base_parser->def->name);
4831 }
4832
4833 {
4834 subparser *tmp;
4835
4836 verbose ("scheduleRunningBaseparser %s with subparsers: ", base_name);
4837 pushLanguage (base);
4838 foreachSubparser(tmp, true)
4839 {
4841 verbose ("%s ", getLanguageName (t));
4842 }
4843 popLanguage ();
4844 verbose ("\n");
4845 }
4846
4847
4848 makePromise(base_name, THIN_STREAM_SPEC);
4849}
4850
4852{
4853 langType lang = getInputLanguage();
4854 parserObject *parser = LanguageTable + lang;
4855
4856 return parser->dontEmit;
4857}
4858
4859
4861{
4862 langType current = getInputLanguage ();
4863 parserObject *current_parser = LanguageTable + current;
4864 subparser *s = getFirstSubparser (current_parser->slaveControlBlock);
4865
4867 return s;
4868 else
4869 return NULL;
4870}
4871
4872extern void printLanguageSubparsers (const langType language,
4873 bool withListHeader, bool machinable, FILE *fp)
4874{
4875 for (int i = 0; i < (int) LanguageCount; i++)
4877
4878 struct colprintTable * table = subparserColprintTableNew();
4879 parserObject *parser;
4880
4881 if (language == LANG_AUTO)
4882 {
4883 for (int i = 0; i < (int) LanguageCount; i++)
4884 {
4885 parser = LanguageTable + i;
4886 if (parser->def->invisible)
4887 continue;
4888
4890 parser->slaveControlBlock);
4891 }
4892 }
4893 else
4894 {
4895 parser = (LanguageTable + language);
4897 parser->slaveControlBlock);
4898 }
4899
4901 withListHeader, machinable,
4902 fp);
4903 colprintTableDelete (table);
4904}
4905
4906extern void printLangdefFlags (bool withListHeader, bool machinable, FILE *fp)
4907{
4908 struct colprintTable * table;
4909
4910 table = flagsColprintTableNew ();
4911
4913
4914 flagsColprintTablePrint (table, withListHeader, machinable, fp);
4915 colprintTableDelete(table);
4916}
4917
4918extern void printKinddefFlags (bool withListHeader, bool machinable, FILE *fp)
4919{
4920 struct colprintTable * table;
4921
4922 table = flagsColprintTableNew ();
4923
4925
4926 flagsColprintTablePrint (table, withListHeader, machinable, fp);
4927 colprintTableDelete(table);
4928}
4929
4931{
4932 parserObject* const parser = LanguageTable + language;
4934}
4935
4936extern void addLanguageRegexTable (const langType language, const char *name)
4937{
4938 parserObject* const parser = LanguageTable + language;
4940}
4941
4942extern void addLanguageTagMultiTableRegex(const langType language,
4943 const char* const table_name,
4944 const char* const regex,
4945 const char* const name, const char* const kinds, const char* const flags,
4946 bool *disabled)
4947{
4948 parserObject* const parser = LanguageTable + language;
4949 addTagMultiTableRegex (parser->lregexControlBlock, table_name, regex,
4950 name, kinds, flags, disabled);
4951}
4952
4953extern bool processPretendOption (const char *const option, const char *const parameter)
4954{
4955 langType new_language, old_language;
4956
4957#define pretendOptionPrefix "_pretend-"
4958 new_language = getLanguageComponentInOptionFull (option, pretendOptionPrefix, true);
4959 if (new_language == LANG_IGNORE)
4960 return false;
4961
4962 if (parameter == NULL || parameter[0] == '\0')
4963 error (FATAL, "A parameter is needed after \"%s\" option", option);
4964
4965 old_language = getNamedLanguageFull (parameter, 0, true);
4966 if (old_language == LANG_IGNORE)
4967 error (FATAL, "Unknown language \"%s\" in option \"--%s=%s\"",
4968 parameter, option, parameter);
4969
4970 if (LanguageTable [new_language].pretendingAsLanguage != LANG_IGNORE)