|
|
|
@ -1,5 +1,5 @@ |
|
|
|
/* |
|
|
|
/* |
|
|
|
* Copyright 2002-2008 the original author or authors. |
|
|
|
* Copyright 2002-2017 the original author or authors. |
|
|
|
* |
|
|
|
* |
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
|
|
* you may not use this file except in compliance with the License. |
|
|
|
* you may not use this file except in compliance with the License. |
|
|
|
@ -45,90 +45,88 @@ class HtmlCharacterEntityDecoder { |
|
|
|
public HtmlCharacterEntityDecoder(HtmlCharacterEntityReferences characterEntityReferences, String original) { |
|
|
|
public HtmlCharacterEntityDecoder(HtmlCharacterEntityReferences characterEntityReferences, String original) { |
|
|
|
this.characterEntityReferences = characterEntityReferences; |
|
|
|
this.characterEntityReferences = characterEntityReferences; |
|
|
|
this.originalMessage = original; |
|
|
|
this.originalMessage = original; |
|
|
|
this.decodedMessage = new StringBuilder(originalMessage.length()); |
|
|
|
this.decodedMessage = new StringBuilder(original.length()); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public String decode() { |
|
|
|
public String decode() { |
|
|
|
while (currentPosition < originalMessage.length()) { |
|
|
|
while (this.currentPosition < this.originalMessage.length()) { |
|
|
|
findNextPotentialReference(currentPosition); |
|
|
|
findNextPotentialReference(this.currentPosition); |
|
|
|
copyCharactersTillPotentialReference(); |
|
|
|
copyCharactersTillPotentialReference(); |
|
|
|
processPossibleReference(); |
|
|
|
processPossibleReference(); |
|
|
|
} |
|
|
|
} |
|
|
|
return decodedMessage.toString(); |
|
|
|
return this.decodedMessage.toString(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private void findNextPotentialReference(int startPosition) { |
|
|
|
private void findNextPotentialReference(int startPosition) { |
|
|
|
nextPotentialReferencePosition = Math.max(startPosition, nextSemicolonPosition - MAX_REFERENCE_SIZE); |
|
|
|
this.nextPotentialReferencePosition = Math.max(startPosition, this.nextSemicolonPosition - MAX_REFERENCE_SIZE); |
|
|
|
|
|
|
|
|
|
|
|
do { |
|
|
|
do { |
|
|
|
nextPotentialReferencePosition = |
|
|
|
this.nextPotentialReferencePosition = |
|
|
|
originalMessage.indexOf('&', nextPotentialReferencePosition); |
|
|
|
this.originalMessage.indexOf('&', this.nextPotentialReferencePosition); |
|
|
|
|
|
|
|
|
|
|
|
if (nextSemicolonPosition != -1 && |
|
|
|
if (this.nextSemicolonPosition != -1 && |
|
|
|
nextSemicolonPosition < nextPotentialReferencePosition) |
|
|
|
this.nextSemicolonPosition < this.nextPotentialReferencePosition) |
|
|
|
nextSemicolonPosition = originalMessage.indexOf(';', nextPotentialReferencePosition + 1); |
|
|
|
this.nextSemicolonPosition = this.originalMessage.indexOf(';', this.nextPotentialReferencePosition + 1); |
|
|
|
|
|
|
|
|
|
|
|
boolean isPotentialReference = |
|
|
|
boolean isPotentialReference = (this.nextPotentialReferencePosition != -1 && |
|
|
|
nextPotentialReferencePosition != -1 |
|
|
|
this.nextSemicolonPosition != -1 && |
|
|
|
&& nextSemicolonPosition != -1 |
|
|
|
this.nextPotentialReferencePosition - this.nextSemicolonPosition < MAX_REFERENCE_SIZE); |
|
|
|
&& nextPotentialReferencePosition - nextSemicolonPosition < MAX_REFERENCE_SIZE; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (isPotentialReference) { |
|
|
|
if (isPotentialReference) { |
|
|
|
break; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
if (nextPotentialReferencePosition == -1) { |
|
|
|
if (this.nextPotentialReferencePosition == -1) { |
|
|
|
break; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
if (nextSemicolonPosition == -1) { |
|
|
|
if (this.nextSemicolonPosition == -1) { |
|
|
|
nextPotentialReferencePosition = -1; |
|
|
|
this.nextPotentialReferencePosition = -1; |
|
|
|
break; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
nextPotentialReferencePosition = nextPotentialReferencePosition + 1; |
|
|
|
this.nextPotentialReferencePosition = this.nextPotentialReferencePosition + 1; |
|
|
|
} |
|
|
|
} |
|
|
|
while (nextPotentialReferencePosition != -1); |
|
|
|
while (this.nextPotentialReferencePosition != -1); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private void copyCharactersTillPotentialReference() { |
|
|
|
private void copyCharactersTillPotentialReference() { |
|
|
|
if (nextPotentialReferencePosition != currentPosition) { |
|
|
|
if (this.nextPotentialReferencePosition != this.currentPosition) { |
|
|
|
int skipUntilIndex = nextPotentialReferencePosition != -1 ? |
|
|
|
int skipUntilIndex = (this.nextPotentialReferencePosition != -1 ? |
|
|
|
nextPotentialReferencePosition : originalMessage.length(); |
|
|
|
this.nextPotentialReferencePosition : this.originalMessage.length()); |
|
|
|
if (skipUntilIndex - currentPosition > 3) { |
|
|
|
if (skipUntilIndex - this.currentPosition > 3) { |
|
|
|
decodedMessage.append(originalMessage.substring(currentPosition, skipUntilIndex)); |
|
|
|
this.decodedMessage.append(this.originalMessage.substring(this.currentPosition, skipUntilIndex)); |
|
|
|
currentPosition = skipUntilIndex; |
|
|
|
this.currentPosition = skipUntilIndex; |
|
|
|
} |
|
|
|
} |
|
|
|
else { |
|
|
|
else { |
|
|
|
while (currentPosition < skipUntilIndex) |
|
|
|
while (this.currentPosition < skipUntilIndex) |
|
|
|
decodedMessage.append(originalMessage.charAt(currentPosition++)); |
|
|
|
this.decodedMessage.append(this.originalMessage.charAt(this.currentPosition++)); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private void processPossibleReference() { |
|
|
|
private void processPossibleReference() { |
|
|
|
if (nextPotentialReferencePosition != -1) { |
|
|
|
if (this.nextPotentialReferencePosition != -1) { |
|
|
|
boolean isNumberedReference = originalMessage.charAt(currentPosition + 1) == '#'; |
|
|
|
boolean isNumberedReference = (this.originalMessage.charAt(this.currentPosition + 1) == '#'); |
|
|
|
boolean wasProcessable = isNumberedReference ? processNumberedReference() : processNamedReference(); |
|
|
|
boolean wasProcessable = isNumberedReference ? processNumberedReference() : processNamedReference(); |
|
|
|
if (wasProcessable) { |
|
|
|
if (wasProcessable) { |
|
|
|
currentPosition = nextSemicolonPosition + 1; |
|
|
|
this.currentPosition = this.nextSemicolonPosition + 1; |
|
|
|
} |
|
|
|
} |
|
|
|
else { |
|
|
|
else { |
|
|
|
char currentChar = originalMessage.charAt(currentPosition); |
|
|
|
char currentChar = this.originalMessage.charAt(this.currentPosition); |
|
|
|
decodedMessage.append(currentChar); |
|
|
|
this.decodedMessage.append(currentChar); |
|
|
|
currentPosition++; |
|
|
|
this.currentPosition++; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private boolean processNumberedReference() { |
|
|
|
private boolean processNumberedReference() { |
|
|
|
boolean isHexNumberedReference = |
|
|
|
char referenceChar = this.originalMessage.charAt(this.nextPotentialReferencePosition + 2); |
|
|
|
originalMessage.charAt(nextPotentialReferencePosition + 2) == 'x' || |
|
|
|
boolean isHexNumberedReference = (referenceChar == 'x' || referenceChar == 'X'); |
|
|
|
originalMessage.charAt(nextPotentialReferencePosition + 2) == 'X'; |
|
|
|
|
|
|
|
try { |
|
|
|
try { |
|
|
|
int value = (!isHexNumberedReference) ? |
|
|
|
int value = (!isHexNumberedReference ? |
|
|
|
Integer.parseInt(getReferenceSubstring(2)) : |
|
|
|
Integer.parseInt(getReferenceSubstring(2)) : |
|
|
|
Integer.parseInt(getReferenceSubstring(3), 16); |
|
|
|
Integer.parseInt(getReferenceSubstring(3), 16)); |
|
|
|
decodedMessage.append((char) value); |
|
|
|
this.decodedMessage.append((char) value); |
|
|
|
return true; |
|
|
|
return true; |
|
|
|
} |
|
|
|
} |
|
|
|
catch (NumberFormatException ex) { |
|
|
|
catch (NumberFormatException ex) { |
|
|
|
@ -138,16 +136,17 @@ class HtmlCharacterEntityDecoder { |
|
|
|
|
|
|
|
|
|
|
|
private boolean processNamedReference() { |
|
|
|
private boolean processNamedReference() { |
|
|
|
String referenceName = getReferenceSubstring(1); |
|
|
|
String referenceName = getReferenceSubstring(1); |
|
|
|
char mappedCharacter = characterEntityReferences.convertToCharacter(referenceName); |
|
|
|
char mappedCharacter = this.characterEntityReferences.convertToCharacter(referenceName); |
|
|
|
if (mappedCharacter != HtmlCharacterEntityReferences.CHAR_NULL) { |
|
|
|
if (mappedCharacter != HtmlCharacterEntityReferences.CHAR_NULL) { |
|
|
|
decodedMessage.append(mappedCharacter); |
|
|
|
this.decodedMessage.append(mappedCharacter); |
|
|
|
return true; |
|
|
|
return true; |
|
|
|
} |
|
|
|
} |
|
|
|
return false; |
|
|
|
return false; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String getReferenceSubstring(int referenceOffset) { |
|
|
|
private String getReferenceSubstring(int referenceOffset) { |
|
|
|
return originalMessage.substring(nextPotentialReferencePosition + referenceOffset, nextSemicolonPosition); |
|
|
|
return this.originalMessage.substring( |
|
|
|
|
|
|
|
this.nextPotentialReferencePosition + referenceOffset, this.nextSemicolonPosition); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|