001/* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.escape; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtCompatible; 021import java.util.Map; 022import javax.annotation.CheckForNull; 023 024/** 025 * A {@link CharEscaper} that uses an array to quickly look up replacement characters for a given 026 * {@code char} value. An additional safe range is provided that determines whether {@code char} 027 * values without specific replacements are to be considered safe and left unescaped or should be 028 * escaped in a general way. 029 * 030 * <p>A good example of usage of this class is for Java source code escaping where the replacement 031 * array contains information about special ASCII characters such as {@code \\t} and {@code \\n} 032 * while {@link #escapeUnsafe} is overridden to handle general escaping of the form {@code \\uxxxx}. 033 * 034 * <p>The size of the data structure used by {@link ArrayBasedCharEscaper} is proportional to the 035 * highest valued character that requires escaping. For example a replacement map containing the 036 * single character '{@code \}{@code u1000}' will require approximately 16K of memory. If you need 037 * to create multiple escaper instances that have the same character replacement mapping consider 038 * using {@link ArrayBasedEscaperMap}. 039 * 040 * @author Sven Mawson 041 * @author David Beaumont 042 * @since 15.0 043 */ 044@Beta 045@GwtCompatible 046@ElementTypesAreNonnullByDefault 047public abstract class ArrayBasedCharEscaper extends CharEscaper { 048 // The replacement array (see ArrayBasedEscaperMap). 049 private final char[][] replacements; 050 // The number of elements in the replacement array. 051 private final int replacementsLength; 052 // The first character in the safe range. 053 private final char safeMin; 054 // The last character in the safe range. 055 private final char safeMax; 056 057 /** 058 * Creates a new ArrayBasedCharEscaper instance with the given replacement map and specified safe 059 * range. If {@code safeMax < safeMin} then no characters are considered safe. 060 * 061 * <p>If a character has no mapped replacement then it is checked against the safe range. If it 062 * lies outside that, then {@link #escapeUnsafe} is called, otherwise no escaping is performed. 063 * 064 * @param replacementMap a map of characters to their escaped representations 065 * @param safeMin the lowest character value in the safe range 066 * @param safeMax the highest character value in the safe range 067 */ 068 protected ArrayBasedCharEscaper( 069 Map<Character, String> replacementMap, char safeMin, char safeMax) { 070 071 this(ArrayBasedEscaperMap.create(replacementMap), safeMin, safeMax); 072 } 073 074 /** 075 * Creates a new ArrayBasedCharEscaper instance with the given replacement map and specified safe 076 * range. If {@code safeMax < safeMin} then no characters are considered safe. This initializer is 077 * useful when explicit instances of ArrayBasedEscaperMap are used to allow the sharing of large 078 * replacement mappings. 079 * 080 * <p>If a character has no mapped replacement then it is checked against the safe range. If it 081 * lies outside that, then {@link #escapeUnsafe} is called, otherwise no escaping is performed. 082 * 083 * @param escaperMap the mapping of characters to be escaped 084 * @param safeMin the lowest character value in the safe range 085 * @param safeMax the highest character value in the safe range 086 */ 087 protected ArrayBasedCharEscaper(ArrayBasedEscaperMap escaperMap, char safeMin, char safeMax) { 088 089 checkNotNull(escaperMap); // GWT specific check (do not optimize) 090 this.replacements = escaperMap.getReplacementArray(); 091 this.replacementsLength = replacements.length; 092 if (safeMax < safeMin) { 093 // If the safe range is empty, set the range limits to opposite extremes 094 // to ensure the first test of either value will (almost certainly) fail. 095 safeMax = Character.MIN_VALUE; 096 safeMin = Character.MAX_VALUE; 097 } 098 this.safeMin = safeMin; 099 this.safeMax = safeMax; 100 } 101 102 /* 103 * This is overridden to improve performance. Rough benchmarking shows that this almost doubles 104 * the speed when processing strings that do not require any escaping. 105 */ 106 @Override 107 public final String escape(String s) { 108 checkNotNull(s); // GWT specific check (do not optimize). 109 for (int i = 0; i < s.length(); i++) { 110 char c = s.charAt(i); 111 if ((c < replacementsLength && replacements[c] != null) || c > safeMax || c < safeMin) { 112 return escapeSlow(s, i); 113 } 114 } 115 return s; 116 } 117 118 /** 119 * Escapes a single character using the replacement array and safe range values. If the given 120 * character does not have an explicit replacement and lies outside the safe range then {@link 121 * #escapeUnsafe} is called. 122 * 123 * @return the replacement characters, or {@code null} if no escaping was required 124 */ 125 @Override 126 @CheckForNull 127 protected final char[] escape(char c) { 128 if (c < replacementsLength) { 129 char[] chars = replacements[c]; 130 if (chars != null) { 131 return chars; 132 } 133 } 134 if (c >= safeMin && c <= safeMax) { 135 return null; 136 } 137 return escapeUnsafe(c); 138 } 139 140 /** 141 * Escapes a {@code char} value that has no direct explicit value in the replacement array and 142 * lies outside the stated safe range. Subclasses should override this method to provide 143 * generalized escaping for characters. 144 * 145 * <p>Note that arrays returned by this method must not be modified once they have been returned. 146 * However it is acceptable to return the same array multiple times (even for different input 147 * characters). 148 * 149 * @param c the character to escape 150 * @return the replacement characters, or {@code null} if no escaping was required 151 */ 152 // TODO(dbeaumont,cpovirk): Rename this something better once refactoring done 153 @CheckForNull 154 protected abstract char[] escapeUnsafe(char c); 155}