"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java" between
lucene-7.6.0-src.tgz and lucene-7.7.0-src.tgz

About: Lucene is a Java full-text search engine (not a complete application, but rather a code library and API; java source code).

WikipediaTokenizerImpl.java  (lucene-7.6.0-src.tgz):WikipediaTokenizerImpl.java  (lucene-7.7.0-src.tgz)
/* The following code was generated by JFlex 1.6.0 */ /* The following code was generated by JFlex 1.7.0 */
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with * (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
skipping to change at line 342 skipping to change at line 342
return j; return j;
} }
/* error codes */ /* error codes */
private static final int ZZ_UNKNOWN_ERROR = 0; private static final int ZZ_UNKNOWN_ERROR = 0;
private static final int ZZ_NO_MATCH = 1; private static final int ZZ_NO_MATCH = 1;
private static final int ZZ_PUSHBACK_2BIG = 2; private static final int ZZ_PUSHBACK_2BIG = 2;
/* error messages for the codes above */ /* error messages for the codes above */
private static final String ZZ_ERROR_MSG[] = { private static final String ZZ_ERROR_MSG[] = {
"Unkown internal scanner error", "Unknown internal scanner error",
"Error: could not match input", "Error: could not match input",
"Error: pushback value was too large" "Error: pushback value was too large"
}; };
/** /**
* ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
*/ */
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
private static final String ZZ_ATTRIBUTE_PACKED_0 = private static final String ZZ_ATTRIBUTE_PACKED_0 =
skipping to change at line 420 skipping to change at line 420
/** the number of characters up to the start of the matched text */ /** the number of characters up to the start of the matched text */
private int yychar; private int yychar;
/** /**
* the number of characters from the last newline up to the start of the * the number of characters from the last newline up to the start of the
* matched text * matched text
*/ */
private int yycolumn; private int yycolumn;
/** /**
* zzAtBOL == true <=> the scanner is currently at the beginning of a line * zzAtBOL == true iff the scanner is currently at the beginning of a line
*/ */
private boolean zzAtBOL = true; private boolean zzAtBOL = true;
/** zzAtEOF == true <=> the scanner is at the EOF */ /** zzAtEOF == true iff the scanner is at the EOF */
private boolean zzAtEOF; private boolean zzAtEOF;
/** denotes if the user-EOF-code has already been executed */ /** denotes if the user-EOF-code has already been executed */
private boolean zzEOFDone; private boolean zzEOFDone;
/** /**
* The number of occupied positions in zzBuffer beyond zzEndRead. * The number of occupied positions in zzBuffer beyond zzEndRead.
* When a lead/high surrogate has been read from the input stream * When a lead/high surrogate has been read from the input stream
* into the final zzBuffer position, this will have a value of 1; * into the final zzBuffer position, this will have a value of 1;
* otherwise, it will have a value of 0. * otherwise, it will have a value of 0.
skipping to change at line 571 skipping to change at line 571
/* if not: blow it up */ /* if not: blow it up */
char newBuffer[] = new char[zzBuffer.length*2]; char newBuffer[] = new char[zzBuffer.length*2];
System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length); System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
zzBuffer = newBuffer; zzBuffer = newBuffer;
zzEndRead += zzFinalHighSurrogate; zzEndRead += zzFinalHighSurrogate;
zzFinalHighSurrogate = 0; zzFinalHighSurrogate = 0;
} }
/* fill the buffer with new input */ /* fill the buffer with new input */
int requested = zzBuffer.length - zzEndRead; int requested = zzBuffer.length - zzEndRead;
int totalRead = 0; int numRead = zzReader.read(zzBuffer, zzEndRead, requested);
while (totalRead < requested) {
int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - t
otalRead);
if (numRead == -1) {
break;
}
totalRead += numRead;
}
if (totalRead > 0) { /* not supposed to occur according to specification of java.io.Reader */
zzEndRead += totalRead; if (numRead == 0) {
if (totalRead == requested) { /* possibly more input available */ throw new java.io.IOException("Reader returned 0 characters. See JFlex exa
mples for workaround.");
}
if (numRead > 0) {
zzEndRead += numRead;
/* If numRead == requested, we might have requested to few chars to
encode a full Unicode character. We assume that a Reader would
otherwise never return half characters. */
if (numRead == requested) {
if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) { if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
--zzEndRead; --zzEndRead;
zzFinalHighSurrogate = 1; zzFinalHighSurrogate = 1;
} }
} }
/* potentially more input available */
return false; return false;
} }
// totalRead = 0: End of stream /* numRead < 0 ==> end of stream */
return true; return true;
} }
/** /**
* Closes the input stream. * Closes the input stream.
*/ */
public final void yyclose() throws java.io.IOException { public final void yyclose() throws java.io.IOException {
zzAtEOF = true; /* indicate end of file */ zzAtEOF = true; /* indicate end of file */
zzEndRead = zzStartRead; /* invalidate buffer */ zzEndRead = zzStartRead; /* invalidate buffer */
skipping to change at line 804 skipping to change at line 805
zzMarkedPosL = zzCurrentPosL; zzMarkedPosL = zzCurrentPosL;
if ( (zzAttributes & 8) == 8 ) break zzForAction; if ( (zzAttributes & 8) == 8 ) break zzForAction;
} }
} }
} }
// store back cached position // store back cached position
zzMarkedPos = zzMarkedPosL; zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
case 1: zzAtEOF = true;
{ numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fa return YYEOF;
ll-through warning: */ break; }
} else {
case 47: break; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
case 2: case 1:
{ positionInc = 1; return ALPHANUM; { numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit
} fall-through warning: */ break;
case 48: break; }
case 3: // fall through
{ positionInc = 1; return CJ; case 47: break;
} case 2:
case 49: break; { positionInc = 1; return ALPHANUM;
case 4: }
{ numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LI // fall through
NK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warni case 48: break;
ng: */ break; case 3:
} { positionInc = 1; return CJ;
case 50: break; }
case 5: // fall through
{ positionInc = 1; /* Break so we don't hit fall-through warning: */ b case 49: break;
reak; case 4:
} { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_
case 51: break; LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through war
case 6: ning: */ break;
{ yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType; }
} // fall through
case 52: break; case 50: break;
case 7: case 5:
{ yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTok { positionInc = 1; /* Break so we don't hit fall-through warning: */
Type; break;
} }
case 53: break; // fall through
case 8: case 51: break;
{ /* Break so we don't hit fall-through warning: */ break;/* ignore */ case 6:
} { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokTyp
case 54: break; e;
case 9: }
{ if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWi // fall through
kiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); nu case 52: break;
mLinkToks++; return currentTokType; case 7:
} { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentT
case 55: break; okType;
case 10: }
{ numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we // fall through
don't hit fall-through warning: */ break; case 53: break;
} case 8:
case 56: break; { /* Break so we don't hit fall-through warning: */ break;/* ignore
case 11: */
{ currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); /* Break }
so we don't hit fall-through warning: */ break; // fall through
} case 54: break;
case 57: break; case 9:
case 12: { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} num
{ currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); ret WikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE);
urn currentTokType;/*italics*/ numLinkToks++; return currentTokType;
} }
case 58: break; // fall through
case 13: case 55: break;
{ currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTER case 10:
NAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break; { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so
} we don't hit fall-through warning: */ break;
case 59: break; }
case 14: // fall through
{ yybegin(STRING); numWikiTokensSeen++; return currentTokType; case 56: break;
} case 11:
case 60: break; { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); /* Bre
case 15: ak so we don't hit fall-through warning: */ break;
{ currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING) }
; /* Break so we don't hit fall-through warning: */ break; // fall through
} case 57: break;
case 61: break; case 12:
case 16: { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); r
{ currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiToken eturn currentTokType;/*italics*/
sSeen++; return currentTokType; }
} // fall through
case 62: break; case 58: break;
case 17: case 13:
{ yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTo { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXT
kType; ERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
} }
case 63: break; // fall through
case 18: case 59: break;
{ /* Break so we don't hit fall-through warning: */ break;/* ignore ST case 14:
RING */ { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
} }
case 64: break; // fall through
case 19: case 60: break;
{ yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRIN case 15:
G ALPHANUM*/ { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRIN
} G); /* Break so we don't hit fall-through warning: */ break;
case 65: break; }
case 20: // fall through
{ numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LI case 61: break;
NK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: case 16:
*/ break; { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTok
} ensSeen++; return currentTokType;
case 66: break; }
case 21: // fall through
{ yybegin(STRING); return currentTokType;/*pipe*/ case 62: break;
} case 17:
case 67: break; { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return current
case 22: TokType;
{ numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBal }
anced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we // fall through
don't hit fall-through warning: */ break; case 63: break;
} case 18:
case 68: break; { /* Break so we don't hit fall-through warning: */ break;/* ignore
case 23: STRING */
{ numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE) }
;/* Break so we don't hit fall-through warning: */ break; // fall through
} case 64: break;
case 69: break; case 19:
case 24: { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STR
{ numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LI ING ALPHANUM*/
NK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: }
*/ break; // fall through
} case 65: break;
case 70: break; case 20:
case 25: { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_
{ numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; y LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning
ybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ bre : */ break;
ak; }
} // fall through
case 71: break; case 66: break;
case 26: case 21:
{ yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ { yybegin(STRING); return currentTokType;/*pipe*/
break; }
} // fall through
case 72: break; case 67: break;
case 27: case 22:
{ numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-t { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numB
hrough warning: */ break; alanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so w
} e don't hit fall-through warning: */ break;
case 73: break; }
case 28: // fall through
{ currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTER case 68: break;
NAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break; case 23:
} { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STAT
case 74: break; E);/* Break so we don't hit fall-through warning: */ break;
case 29: }
{ currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTE // fall through
RNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break; case 69: break;
} case 24:
case 75: break; { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_
case 30: LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning
{ yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: * : */ break;
/ break; }
} // fall through
case 76: break; case 70: break;
case 31: case 25:
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Br { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION;
eak so we don't hit fall-through warning: */ break;/*end italics*/ yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ b
} reak;
case 77: break; }
case 32: // fall through
{ numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LI case 71: break;
NK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: case 26:
*/ break; { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning:
} */ break;
case 78: break; }
case 33: // fall through
{ positionInc = 1; return APOSTROPHE; case 72: break;
} case 27:
case 79: break; { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall
case 34: -through warning: */ break;
{ positionInc = 1; return HOST; }
} // fall through
case 80: break; case 73: break;
case 35: case 28:
{ positionInc = 1; return NUM; { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INT
} ERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
case 81: break; }
case 36: // fall through
{ positionInc = 1; return COMPANY; case 74: break;
} case 29:
case 82: break; { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(IN
case 37: TERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
{ currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); / }
* Break so we don't hit fall-through warning: */ break; // fall through
} case 75: break;
case 83: break; case 30:
case 38: { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning:
{ numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Bre */ break;
ak so we don't hit fall-through warning: */ break;/*end bold*/ }
} // fall through
case 84: break; case 76: break;
case 39: case 31:
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Br { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /*
eak so we don't hit fall-through warning: */ break;/*end sub header*/ Break so we don't hit fall-through warning: */ break;/*end italics*/
} }
case 85: break; // fall through
case 40: case 77: break;
{ positionInc = 1; return ACRONYM; case 32:
} { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_
case 86: break; LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning
case 41: : */ break;
{ positionInc = 1; return EMAIL; }
} // fall through
case 87: break; case 78: break;
case 42: case 33:
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Br { positionInc = 1; return APOSTROPHE;
eak so we don't hit fall-through warning: */ break;/*end bold italics*/ }
} // fall through
case 88: break; case 79: break;
case 43: case 34:
{ positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); { positionInc = 1; return HOST;
return currentTokType; }
} // fall through
case 89: break; case 80: break;
case 44: case 35:
{ numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; y { positionInc = 1; return NUM;
ybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break; }
} // fall through
case 90: break; case 81: break;
case 45: case 36:
{ currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_S { positionInc = 1; return COMPANY;
TATE); /* Break so we don't hit fall-through warning: */ break; }
} // fall through
case 91: break; case 82: break;
case 46: case 37:
{ numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yy { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE);
begin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break; /* Break so we don't hit fall-through warning: */ break;
} }
case 92: break; // fall through
default: case 83: break;
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { case 38:
zzAtEOF = true; { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* B
return YYEOF; reak so we don't hit fall-through warning: */ break;/*end bold*/
} }
else { // fall through
case 84: break;
case 39:
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /*
Break so we don't hit fall-through warning: */ break;/*end sub header*/
}
// fall through
case 85: break;
case 40:
{ positionInc = 1; return ACRONYM;
}
// fall through
case 86: break;
case 41:
{ positionInc = 1; return EMAIL;
}
// fall through
case 87: break;
case 42:
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /*
Break so we don't hit fall-through warning: */ break;/*end bold italics*/
}
// fall through
case 88: break;
case 43:
{ positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE)
; return currentTokType;
}
// fall through
case 89: break;
case 44:
{ numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY;
yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break
;
}
// fall through
case 90: break;
case 45:
{ currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY
_STATE); /* Break so we don't hit fall-through warning: */ break;
}
// fall through
case 91: break;
case 46:
{ numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;
yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break
;
}
// fall through
case 92: break;
default:
zzScanError(ZZ_NO_MATCH); zzScanError(ZZ_NO_MATCH);
} }
} }
} }
} }
} }
 End of changes. 10 change blocks. 
250 lines changed or deleted 301 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)