1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with this
4 * work for additional information regarding copyright ownership. The ASF
5 * licenses this file to you under the Apache License, Version 2.0 (the
6 * "License"); you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 * License for the specific language governing permissions and limitations
15 * under the License.
16 */
17 package org.apache.hadoop.hbase.io.encoding;
18
19 import java.io.DataInputStream;
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22
23 import org.apache.hadoop.hbase.classification.InterfaceAudience;
24 import org.apache.hadoop.hbase.KeyValue;
25 import org.apache.hadoop.hbase.KeyValue.KVComparator;
26 import org.apache.hadoop.hbase.io.hfile.HFileContext;
27
28 /**
29 * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
30 * <ul>
31 * <li>the KeyValues are stored sorted by key</li>
32 * <li>we know the structure of KeyValue</li>
33 * <li>the values are always iterated forward from beginning of block</li>
34 * <li>knowledge of Key Value format</li>
35 * </ul>
36 * It is designed to work fast enough to be feasible as in memory compression.
37 *
38 * After encoding, it also optionally compresses the encoded data if a
39 * compression algorithm is specified in HFileBlockEncodingContext argument of
40 * {@link #encodeKeyValues(ByteBuffer, HFileBlockEncodingContext)}.
41 */
42 @InterfaceAudience.Private
43 public interface DataBlockEncoder {
44
45 /**
46 * Encodes KeyValues. It will first encode key value pairs, and then
47 * optionally do the compression for the encoded data.
48 *
49 * @param in
50 * Source of KeyValue for compression.
51 * @param encodingCtx
52 * the encoding context which will contain encoded uncompressed bytes
53 * as well as compressed encoded bytes if compression is enabled, and
54 * also it will reuse resources across multiple calls.
55 * @throws IOException
56 * If there is an error writing to output stream.
57 */
58 void encodeKeyValues(ByteBuffer in, HFileBlockEncodingContext encodingCtx) throws IOException;
59
60 /**
61 * Decode.
62 * @param source Compressed stream of KeyValues.
63 * @param decodingCtx
64 * @return Uncompressed block of KeyValues.
65 * @throws IOException If there is an error in source.
66 */
67 ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
68 throws IOException;
69
70 /**
71 * Return first key in block. Useful for indexing. Typically does not make
72 * a deep copy but returns a buffer wrapping a segment of the actual block's
73 * byte array. This is because the first key in block is usually stored
74 * unencoded.
75 * @param block encoded block we want index, the position will not change
76 * @return First key in block.
77 */
78 ByteBuffer getFirstKeyInBlock(ByteBuffer block);
79
80 /**
81 * Create a HFileBlock seeker which find KeyValues within a block.
82 * @param comparator what kind of comparison should be used
83 * @param decodingCtx
84 * @return A newly created seeker.
85 */
86 EncodedSeeker createSeeker(KVComparator comparator,
87 HFileBlockDecodingContext decodingCtx);
88
89 /**
90 * Creates a encoder specific encoding context
91 *
92 * @param encoding
93 * encoding strategy used
94 * @param headerBytes
95 * header bytes to be written, put a dummy header here if the header
96 * is unknown
97 * @param meta
98 * HFile meta data
99 * @return a newly created encoding context
100 */
101 HFileBlockEncodingContext newDataBlockEncodingContext(
102 DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta);
103
104 /**
105 * Creates an encoder specific decoding context, which will prepare the data
106 * before actual decoding
107 *
108 * @param meta
109 * HFile meta data
110 * @return a newly created decoding context
111 */
112 HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta);
113
114 /**
115 * An interface which enable to seek while underlying data is encoded.
116 *
117 * It works on one HFileBlock, but it is reusable. See
118 * {@link #setCurrentBuffer(ByteBuffer)}.
119 */
120 interface EncodedSeeker {
121 /**
122 * Set on which buffer there will be done seeking.
123 * @param buffer Used for seeking.
124 */
125 void setCurrentBuffer(ByteBuffer buffer);
126
127 /**
128 * Does a deep copy of the key at the current position. A deep copy is
129 * necessary because buffers are reused in the decoder.
130 * @return key at current position
131 */
132 ByteBuffer getKeyDeepCopy();
133
134 /**
135 * Does a shallow copy of the value at the current position. A shallow
136 * copy is possible because the returned buffer refers to the backing array
137 * of the original encoded buffer.
138 * @return value at current position
139 */
140 ByteBuffer getValueShallowCopy();
141
142 /** @return key value at current position with position set to limit */
143 ByteBuffer getKeyValueBuffer();
144
145 /**
146 * @return the KeyValue object at the current position. Includes memstore
147 * timestamp.
148 */
149 KeyValue getKeyValue();
150
151 /** Set position to beginning of given block */
152 void rewind();
153
154 /**
155 * Move to next position
156 * @return true on success, false if there is no more positions.
157 */
158 boolean next();
159
160 /**
161 * Moves the seeker position within the current block to:
162 * <ul>
163 * <li>the last key that that is less than or equal to the given key if
164 * <code>seekBefore</code> is false</li>
165 * <li>the last key that is strictly less than the given key if <code>
166 * seekBefore</code> is true. The caller is responsible for loading the
167 * previous block if the requested key turns out to be the first key of the
168 * current block.</li>
169 * </ul>
170 * @param key byte array containing the key
171 * @param offset key position the array
172 * @param length key length in bytes
173 * @param seekBefore find the key strictly less than the given key in case
174 * of an exact match. Does not matter in case of an inexact match.
175 * @return 0 on exact match, 1 on inexact match.
176 */
177 int seekToKeyInBlock(
178 byte[] key, int offset, int length, boolean seekBefore
179 );
180
181 /**
182 * Compare the given key against the current key
183 * @param comparator
184 * @param key
185 * @param offset
186 * @param length
187 * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater
188 */
189 public int compareKey(KVComparator comparator, byte[] key, int offset, int length);
190 }
191 }