@ -30,7 +30,6 @@ import java.nio.channels.WritableByteChannel;
import java.nio.file.OpenOption ;
import java.nio.file.OpenOption ;
import java.nio.file.Path ;
import java.nio.file.Path ;
import java.nio.file.StandardOpenOption ;
import java.nio.file.StandardOpenOption ;
import java.util.Arrays ;
import java.util.HashSet ;
import java.util.HashSet ;
import java.util.Set ;
import java.util.Set ;
import java.util.concurrent.Callable ;
import java.util.concurrent.Callable ;
@ -568,65 +567,290 @@ public abstract class DataBufferUtils {
/ * *
/ * *
* Return a { @link Matcher } for the given delimiter .
* Return a { @link Matcher } for the given delimiter .
* The matcher can be used to find the delimiters in data buffers .
* The matcher can be used to find the delimiters in a stream of data buffers .
* @param delimiter the delimiter bytes to find
* @param delimiter the delimiter bytes to find
* @return the matcher
* @return the matcher
* @since 5 . 2
* @since 5 . 2
* /
* /
public static Matcher matcher ( byte [ ] delimiter ) {
public static Matcher matcher ( byte [ ] delimiter ) {
Assert . isTrue ( delimiter . length > 0 , "Delimiter must not be empty" ) ;
return createMatcher ( delimiter ) ;
return new KnuthMorrisPrattMatcher ( delimiter ) ;
}
}
/ * * Return a { @link Matcher } for the given delimiters .
/ * *
* The matcher can be used to find the delimiters in data buffers .
* Return a { @link Matcher } for the given delimiters .
* The matcher can be used to find the delimiters in a stream of data buffers .
* @param delimiters the delimiters bytes to find
* @param delimiters the delimiters bytes to find
* @return the matcher
* @return the matcher
* @since 5 . 2
* @since 5 . 2
* /
* /
public static Matcher matcher ( byte [ ] . . . delimiters ) {
public static Matcher matcher ( byte [ ] . . . delimiters ) {
Assert . isTrue ( delimiters . length > 0 , "Delimiters must not be empty" ) ;
Assert . isTrue ( delimiters . length > 0 , "Delimiters must not be empty" ) ;
if ( delimiters . length = = 1 ) {
return ( delimiters . length = = 1 ? createMatcher ( delimiters [ 0 ] ) : new CompositeMatcher ( delimiters ) ) ;
return matcher ( delimiters [ 0 ] ) ;
}
}
else {
private static NestedMatcher createMatcher ( byte [ ] delimiter ) {
Matcher [ ] matchers = new Matcher [ delimiters . length ] ;
Assert . isTrue ( delimiter . length > 0 , "Delimiter must not be empty" ) ;
for ( int i = 0 ; i < delimiters . length ; i + + ) {
switch ( delimiter . length ) {
matchers [ i ] = matcher ( delimiters [ i ] ) ;
case 1 :
}
return ( delimiter [ 0 ] = = 10 ? SingleByteMatcher . NEWLINE_MATCHER : new SingleByteMatcher ( delimiter ) ) ;
return new CompositeMatcher ( matchers ) ;
case 2 :
return new TwoByteMatcher ( delimiter ) ;
default :
return new KnuthMorrisPrattMatcher ( delimiter ) ;
}
}
}
}
/ * *
/ * *
* Defines an object that matches a data buffer against a delimiter .
* Contract to find delimiter ( s ) against one or more data buffers that can
* be passed one at a time to the { @link # match ( DataBuffer ) } method .
*
* @since 5 . 2
* @since 5 . 2
* @see # match ( DataBuffer )
* @see # match ( DataBuffer )
* /
* /
public interface Matcher {
public interface Matcher {
/ * *
/ * *
* Returns the position of the final matching delimiter byte that matches the given buffer ,
* Find the first matching delimiter and return the index of the last
* or { @code - 1 } if not found .
* byte of the delimiter , or { @code - 1 } if not found .
* @param dataBuffer the buffer in which to search for the delimiter
* @return the position of the final matching delimiter , or { @code - 1 } if not found .
* /
* /
int match ( DataBuffer dataBuffer ) ;
int match ( DataBuffer dataBuffer ) ;
/ * *
/ * *
* Return the delimiter used for this matcher .
* Return the delimiter from the last invocation of { @link # match ( DataBuffer ) } .
* @return the delimiter
* /
* /
byte [ ] delimiter ( ) ;
byte [ ] delimiter ( ) ;
/ * *
/ * *
* Resets the state of this matcher .
* Reset the state of this matcher .
* /
* /
void reset ( ) ;
void reset ( ) ;
}
}
/ * *
* Matcher that supports searching for multiple delimiters .
* /
private static class CompositeMatcher implements Matcher {
private static final byte [ ] NO_DELIMITER = new byte [ 0 ] ;
private final NestedMatcher [ ] matchers ;
byte [ ] longestDelimiter = NO_DELIMITER ;
CompositeMatcher ( byte [ ] [ ] delimiters ) {
this . matchers = initMatchers ( delimiters ) ;
}
private static NestedMatcher [ ] initMatchers ( byte [ ] [ ] delimiters ) {
NestedMatcher [ ] matchers = new NestedMatcher [ delimiters . length ] ;
for ( int i = 0 ; i < delimiters . length ; i + + ) {
matchers [ i ] = createMatcher ( delimiters [ i ] ) ;
}
return matchers ;
}
@Override
public int match ( DataBuffer dataBuffer ) {
this . longestDelimiter = NO_DELIMITER ;
for ( int pos = dataBuffer . readPosition ( ) ; pos < dataBuffer . writePosition ( ) ; pos + + ) {
byte b = dataBuffer . getByte ( pos ) ;
for ( NestedMatcher matcher : this . matchers ) {
if ( matcher . match ( b ) & & matcher . delimiter ( ) . length > this . longestDelimiter . length ) {
this . longestDelimiter = matcher . delimiter ( ) ;
}
}
if ( this . longestDelimiter ! = NO_DELIMITER ) {
reset ( ) ;
return pos ;
}
}
return - 1 ;
}
@Override
public byte [ ] delimiter ( ) {
Assert . state ( this . longestDelimiter ! = NO_DELIMITER , "Illegal state!" ) ;
return this . longestDelimiter ;
}
@Override
public void reset ( ) {
for ( NestedMatcher matcher : this . matchers ) {
matcher . reset ( ) ;
}
}
}
/ * *
* Matcher that can be nested within { @link CompositeMatcher } where multiple
* matchers advance together using the same index , one byte at a time .
* /
private interface NestedMatcher extends Matcher {
/ * *
* Perform a match against the next byte of the stream and return true
* if the delimiter is fully matched .
* /
boolean match ( byte b ) ;
}
/ * *
* Matcher for a single byte delimiter .
* /
private static class SingleByteMatcher implements NestedMatcher {
static SingleByteMatcher NEWLINE_MATCHER = new SingleByteMatcher ( new byte [ ] { 10 } ) ;
private final byte [ ] delimiter ;
SingleByteMatcher ( byte [ ] delimiter ) {
Assert . isTrue ( delimiter . length = = 1 , "Expected a 1 byte delimiter" ) ;
this . delimiter = delimiter ;
}
@Override
public int match ( DataBuffer dataBuffer ) {
for ( int pos = dataBuffer . readPosition ( ) ; pos < dataBuffer . writePosition ( ) ; pos + + ) {
byte b = dataBuffer . getByte ( pos ) ;
if ( match ( b ) ) {
return pos ;
}
}
return - 1 ;
}
@Override
public boolean match ( byte b ) {
return this . delimiter [ 0 ] = = b ;
}
@Override
public byte [ ] delimiter ( ) {
return this . delimiter ;
}
@Override
public void reset ( ) {
}
}
/ * *
* Base class for a { @link NestedMatcher } .
* /
private static abstract class AbstractNestedMatcher implements NestedMatcher {
private final byte [ ] delimiter ;
private int matches = 0 ;
protected AbstractNestedMatcher ( byte [ ] delimiter ) {
this . delimiter = delimiter ;
}
protected void setMatches ( int index ) {
this . matches = index ;
}
protected int getMatches ( ) {
return this . matches ;
}
@Override
public int match ( DataBuffer dataBuffer ) {
for ( int pos = dataBuffer . readPosition ( ) ; pos < dataBuffer . writePosition ( ) ; pos + + ) {
byte b = dataBuffer . getByte ( pos ) ;
if ( match ( b ) ) {
reset ( ) ;
return pos ;
}
}
return - 1 ;
}
@Override
public boolean match ( byte b ) {
if ( b = = this . delimiter [ this . matches ] ) {
this . matches + + ;
return ( this . matches = = delimiter ( ) . length ) ;
}
return false ;
}
@Override
public byte [ ] delimiter ( ) {
return this . delimiter ;
}
@Override
public void reset ( ) {
this . matches = 0 ;
}
}
/ * *
* Matcher with a 2 byte delimiter that does not benefit from a
* Knuth - Morris - Pratt suffix - prefix table .
* /
private static class TwoByteMatcher extends AbstractNestedMatcher {
protected TwoByteMatcher ( byte [ ] delimiter ) {
super ( delimiter ) ;
Assert . isTrue ( delimiter . length = = 2 , "Expected a 2-byte delimiter" ) ;
}
}
/ * *
* Implementation of { @link Matcher } that uses the Knuth - Morris - Pratt algorithm .
* @see < a href = "https://www.nayuki.io/page/knuth-morris-pratt-string-matching" > Knuth - Morris - Pratt string matching < / a >
* /
private static class KnuthMorrisPrattMatcher extends AbstractNestedMatcher {
private final int [ ] table ;
public KnuthMorrisPrattMatcher ( byte [ ] delimiter ) {
super ( delimiter ) ;
this . table = longestSuffixPrefixTable ( delimiter ) ;
}
private static int [ ] longestSuffixPrefixTable ( byte [ ] delimiter ) {
int [ ] result = new int [ delimiter . length ] ;
result [ 0 ] = 0 ;
for ( int i = 1 ; i < delimiter . length ; i + + ) {
int j = result [ i - 1 ] ;
while ( j > 0 & & delimiter [ i ] ! = delimiter [ j ] ) {
j = result [ j - 1 ] ;
}
if ( delimiter [ i ] = = delimiter [ j ] ) {
j + + ;
}
result [ i ] = j ;
}
return result ;
}
@Override
public boolean match ( byte b ) {
while ( getMatches ( ) > 0 & & b ! = delimiter ( ) [ getMatches ( ) ] ) {
setMatches ( this . table [ getMatches ( ) - 1 ] ) ;
}
return super . match ( b ) ;
}
}
private static class ReadableByteChannelGenerator implements Consumer < SynchronousSink < DataBuffer > > {
private static class ReadableByteChannelGenerator implements Consumer < SynchronousSink < DataBuffer > > {
private final ReadableByteChannel channel ;
private final ReadableByteChannel channel ;
@ -908,124 +1132,4 @@ public abstract class DataBufferUtils {
}
}
}
}
/ * *
* Implementation of { @link Matcher } that uses the Knuth - Morris - Pratt algorithm .
* @see < a href = "https://www.nayuki.io/page/knuth-morris-pratt-string-matching" > Knuth - Morris - Pratt string matching < / a >
* /
private static class KnuthMorrisPrattMatcher implements Matcher {
private final byte [ ] delimiter ;
private final int [ ] table ;
private int matches = 0 ;
public KnuthMorrisPrattMatcher ( byte [ ] delimiter ) {
this . delimiter = Arrays . copyOf ( delimiter , delimiter . length ) ;
this . table = longestSuffixPrefixTable ( delimiter ) ;
}
private static int [ ] longestSuffixPrefixTable ( byte [ ] delimiter ) {
int [ ] result = new int [ delimiter . length ] ;
result [ 0 ] = 0 ;
for ( int i = 1 ; i < delimiter . length ; i + + ) {
int j = result [ i - 1 ] ;
while ( j > 0 & & delimiter [ i ] ! = delimiter [ j ] ) {
j = result [ j - 1 ] ;
}
if ( delimiter [ i ] = = delimiter [ j ] ) {
j + + ;
}
result [ i ] = j ;
}
return result ;
}
@Override
public int match ( DataBuffer dataBuffer ) {
for ( int i = dataBuffer . readPosition ( ) ; i < dataBuffer . writePosition ( ) ; i + + ) {
byte b = dataBuffer . getByte ( i ) ;
while ( this . matches > 0 & & b ! = this . delimiter [ this . matches ] ) {
this . matches = this . table [ this . matches - 1 ] ;
}
if ( b = = this . delimiter [ this . matches ] ) {
this . matches + + ;
if ( this . matches = = this . delimiter . length ) {
reset ( ) ;
return i ;
}
}
}
return - 1 ;
}
@Override
public byte [ ] delimiter ( ) {
return Arrays . copyOf ( this . delimiter , this . delimiter . length ) ;
}
@Override
public void reset ( ) {
this . matches = 0 ;
}
}
/ * *
* Implementation of { @link Matcher } that wraps several other matchers .
* /
private static class CompositeMatcher implements Matcher {
private static final byte [ ] NO_DELIMITER = new byte [ 0 ] ;
private final Matcher [ ] matchers ;
byte [ ] longestDelimiter = NO_DELIMITER ;
public CompositeMatcher ( Matcher [ ] matchers ) {
this . matchers = matchers ;
}
@Override
public int match ( DataBuffer dataBuffer ) {
this . longestDelimiter = NO_DELIMITER ;
int bestEndIdx = Integer . MAX_VALUE ;
for ( Matcher matcher : this . matchers ) {
int endIdx = matcher . match ( dataBuffer ) ;
if ( endIdx ! = - 1 & &
endIdx < = bestEndIdx & &
matcher . delimiter ( ) . length > this . longestDelimiter . length ) {
bestEndIdx = endIdx ;
this . longestDelimiter = matcher . delimiter ( ) ;
}
}
if ( bestEndIdx = = Integer . MAX_VALUE ) {
this . longestDelimiter = NO_DELIMITER ;
return - 1 ;
}
else {
reset ( ) ;
return bestEndIdx ;
}
}
@Override
public byte [ ] delimiter ( ) {
Assert . state ( this . longestDelimiter ! = NO_DELIMITER , "Illegal state!" ) ;
return this . longestDelimiter ;
}
@Override
public void reset ( ) {
for ( Matcher matcher : this . matchers ) {
matcher . reset ( ) ;
}
}
}
}
}