From 9a64d3bf3fc110d255d36a9692ea1366c3ef0045 Mon Sep 17 00:00:00 2001 From: Phillip Webb Date: Thu, 29 Mar 2018 13:51:41 -0700 Subject: [PATCH] Fix AsciiBytes unicode decoding Fix the decoding logic in the AsciiBytes `hashCode` and `matches` to correctly deal with multi-byte encodings. Fixes gh-12504 --- .../boot/loader/jar/AsciiBytes.java | 38 ++++++++++--------- .../boot/loader/jar/AsciiBytesTests.java | 14 ++++++- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/spring-boot-project/spring-boot-tools/spring-boot-loader/src/main/java/org/springframework/boot/loader/jar/AsciiBytes.java b/spring-boot-project/spring-boot-tools/spring-boot-loader/src/main/java/org/springframework/boot/loader/jar/AsciiBytes.java index dcf4da07f4f..c2c36451ffe 100644 --- a/spring-boot-project/spring-boot-tools/spring-boot-loader/src/main/java/org/springframework/boot/loader/jar/AsciiBytes.java +++ b/spring-boot-project/spring-boot-tools/spring-boot-loader/src/main/java/org/springframework/boot/loader/jar/AsciiBytes.java @@ -29,7 +29,9 @@ final class AsciiBytes { private static final String EMPTY_STRING = ""; - private static final int[] EXCESS = { 0x0, 0x1080, 0x96, 0x1c82080 }; + private static final int[] INITIAL_BYTE_BITMASK = { 0x7F, 0x1F, 0x0F, 0x07 }; + + private static final int SUBSEQUENT_BYTE_BITMASK = 0x3F; private final byte[] bytes; @@ -142,13 +144,10 @@ final class AsciiBytes { int totalLen = (nameLen + (suffix == 0 ? 0 : 1)); for (int i = this.offset; i < this.offset + this.length; i++) { int b = this.bytes[i]; - if (b < 0) { - b = b & 0x7F; - int limit = getRemainingUtfBytes(b); - for (int j = 0; j < limit; j++) { - b = (b << 6) + (this.bytes[++i] & 0xFF); - } - b -= EXCESS[limit]; + int remainingUtfBytes = getNumberOfUtfBytes(b) - 1; + b &= INITIAL_BYTE_BITMASK[remainingUtfBytes]; + for (int j = 0; j < remainingUtfBytes; j++) { + b = (b << 6) + (this.bytes[++i] & SUBSEQUENT_BYTE_BITMASK); } char c = getChar(name, suffix, charIndex++); if (b <= 0xFFFF) { @@ -185,13 +184,10 @@ final class AsciiBytes { if (hash == 0 && this.bytes.length > 0) { for (int i = this.offset; i < this.offset + this.length; i++) { int b = this.bytes[i]; - if (b < 0) { - b = b & 0x7F; - int limit = getRemainingUtfBytes(b); - for (int j = 0; j < limit; j++) { - b = (b << 6) + (this.bytes[++i] & 0xFF); - } - b -= EXCESS[limit]; + int remainingUtfBytes = getNumberOfUtfBytes(b) - 1; + b &= INITIAL_BYTE_BITMASK[remainingUtfBytes]; + for (int j = 0; j < remainingUtfBytes; j++) { + b = (b << 6) + (this.bytes[++i] & SUBSEQUENT_BYTE_BITMASK); } if (b <= 0xFFFF) { hash = 31 * hash + b; @@ -206,8 +202,16 @@ final class AsciiBytes { return hash; } - private int getRemainingUtfBytes(int b) { - return (b < 96 ? 1 : (b < 112 ? 2 : 3)); + private int getNumberOfUtfBytes(int b) { + if ((b & 0x80) == 0) { + return 1; + } + int numberOfUtfBytes = 0; + while ((b & 0x80) != 0) { + b <<= 1; + numberOfUtfBytes++; + } + return numberOfUtfBytes; } @Override diff --git a/spring-boot-project/spring-boot-tools/spring-boot-loader/src/test/java/org/springframework/boot/loader/jar/AsciiBytesTests.java b/spring-boot-project/spring-boot-tools/spring-boot-loader/src/test/java/org/springframework/boot/loader/jar/AsciiBytesTests.java index e26cb170964..059c7d74161 100644 --- a/spring-boot-project/spring-boot-tools/spring-boot-loader/src/test/java/org/springframework/boot/loader/jar/AsciiBytesTests.java +++ b/spring-boot-project/spring-boot-tools/spring-boot-loader/src/test/java/org/springframework/boot/loader/jar/AsciiBytesTests.java @@ -1,5 +1,5 @@ /* - * Copyright 2012-2017 the original author or authors. + * Copyright 2012-2018 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -184,6 +184,18 @@ public class AsciiBytesTests { matchesSameAsString("\ud83d\udca9"); } + @Test + public void hashCodeFromInstanceMatchesHashCodeFromString() { + String name = "fonts/宋体/simsun.ttf"; + assertThat(new AsciiBytes(name).hashCode()).isEqualTo(AsciiBytes.hashCode(name)); + } + + @Test + public void instanceCreatedFromCharSequenceMatchesSameCharSequence() { + String name = "fonts/宋体/simsun.ttf"; + assertThat(new AsciiBytes(name).matches(name, NO_SUFFIX)).isTrue(); + } + private void matchesSameAsString(String input) { assertThat(new AsciiBytes(input).matches(input, NO_SUFFIX)).isTrue(); }