1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.zookeeper;
20
21 import java.util.List;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.hbase.classification.InterfaceAudience;
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.HRegionInfo;
28 import org.apache.hadoop.hbase.RegionTransition;
29 import org.apache.hadoop.hbase.ServerName;
30 import org.apache.hadoop.hbase.exceptions.DeserializationException;
31 import org.apache.hadoop.hbase.executor.EventType;
32 import org.apache.zookeeper.AsyncCallback;
33 import org.apache.zookeeper.KeeperException;
34 import org.apache.zookeeper.KeeperException.Code;
35 import org.apache.zookeeper.KeeperException.NoNodeException;
36 import org.apache.zookeeper.KeeperException.NodeExistsException;
37 import org.apache.zookeeper.data.Stat;
38
39 // We should not be importing this Type here, nor a RegionTransition, etc. This class should be
40 // about zk and bytes only.
41
42 /**
43 * Utility class for doing region assignment in ZooKeeper. This class extends
44 * stuff done in {@link ZKUtil} to cover specific assignment operations.
45 * <p>
46 * Contains only static methods and constants.
47 * <p>
48 * Used by both the Master and RegionServer.
49 * <p>
50 * All valid transitions outlined below:
51 * <p>
52 * <b>MASTER</b>
53 * <ol>
54 * <li>
55 * Master creates an unassigned node as OFFLINE.
56 * - Cluster startup and table enabling.
57 * </li>
58 * <li>
59 * Master forces an existing unassigned node to OFFLINE.
60 * - RegionServer failure.
61 * - Allows transitions from all states to OFFLINE.
62 * </li>
63 * <li>
64 * Master deletes an unassigned node that was in a OPENED state.
65 * - Normal region transitions. Besides cluster startup, no other deletions
66 * of unassigned nodes is allowed.
67 * </li>
68 * <li>
69 * Master deletes all unassigned nodes regardless of state.
70 * - Cluster startup before any assignment happens.
71 * </li>
72 * </ol>
73 * <p>
74 * <b>REGIONSERVER</b>
75 * <ol>
76 * <li>
77 * RegionServer creates an unassigned node as CLOSING.
78 * - All region closes will do this in response to a CLOSE RPC from Master.
79 * - A node can never be transitioned to CLOSING, only created.
80 * </li>
81 * <li>
82 * RegionServer transitions an unassigned node from CLOSING to CLOSED.
83 * - Normal region closes. CAS operation.
84 * </li>
85 * <li>
86 * RegionServer transitions an unassigned node from OFFLINE to OPENING.
87 * - All region opens will do this in response to an OPEN RPC from the Master.
88 * - Normal region opens. CAS operation.
89 * </li>
90 * <li>
91 * RegionServer transitions an unassigned node from OPENING to OPENED.
92 * - Normal region opens. CAS operation.
93 * </li>
94 * </ol>
95 */
96 @InterfaceAudience.Private
97 public class ZKAssign {
98 private static final Log LOG = LogFactory.getLog(ZKAssign.class);
99
100 /**
101 * Gets the full path node name for the unassigned node for the specified
102 * region.
103 * @param zkw zk reference
104 * @param regionName region name
105 * @return full path node name
106 */
107 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
108 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
109 }
110
111 /**
112 * Gets the region name from the full path node name of an unassigned node.
113 * @param path full zk path
114 * @return region name
115 */
116 public static String getRegionName(ZooKeeperWatcher zkw, String path) {
117 return path.substring(zkw.assignmentZNode.length()+1);
118 }
119
120 // Master methods
121
122 /**
123 * Creates a new unassigned node in the OFFLINE state for the specified region.
124 *
125 * <p>Does not transition nodes from other states. If a node already exists
126 * for this region, a {@link NodeExistsException} will be thrown.
127 *
128 * <p>Sets a watcher on the unassigned region node if the method is successful.
129 *
130 * <p>This method should only be used during cluster startup and the enabling
131 * of a table.
132 *
133 * @param zkw zk reference
134 * @param region region to be created as offline
135 * @param serverName server transition will happen on
136 * @throws KeeperException if unexpected zookeeper exception
137 * @throws KeeperException.NodeExistsException if node already exists
138 */
139 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
140 ServerName serverName)
141 throws KeeperException, KeeperException.NodeExistsException {
142 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
143 }
144
145 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
146 ServerName serverName, final EventType event)
147 throws KeeperException, KeeperException.NodeExistsException {
148 LOG.debug(zkw.prefix("Creating unassigned node " +
149 region.getEncodedName() + " in OFFLINE state"));
150 RegionTransition rt =
151 RegionTransition.createRegionTransition(event, region.getRegionName(), serverName);
152 String node = getNodeName(zkw, region.getEncodedName());
153 ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
154 }
155
156 /**
157 * Creates an unassigned node in the OFFLINE state for the specified region.
158 * <p>
159 * Runs asynchronously. Depends on no pre-existing znode.
160 *
161 * <p>Sets a watcher on the unassigned region node.
162 *
163 * @param zkw zk reference
164 * @param region region to be created as offline
165 * @param serverName server transition will happen on
166 * @param cb
167 * @param ctx
168 * @throws KeeperException if unexpected zookeeper exception
169 * @throws KeeperException.NodeExistsException if node already exists
170 */
171 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
172 HRegionInfo region, ServerName serverName,
173 final AsyncCallback.StringCallback cb, final Object ctx)
174 throws KeeperException {
175 LOG.debug(zkw.prefix("Async create of unassigned node " +
176 region.getEncodedName() + " with OFFLINE state"));
177 RegionTransition rt =
178 RegionTransition.createRegionTransition(
179 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
180 String node = getNodeName(zkw, region.getEncodedName());
181 ZKUtil.asyncCreate(zkw, node, rt.toByteArray(), cb, ctx);
182 }
183
184 /**
185 * Creates or force updates an unassigned node to the OFFLINE state for the
186 * specified region.
187 * <p>
188 * Attempts to create the node but if it exists will force it to transition to
189 * and OFFLINE state.
190 *
191 * <p>Sets a watcher on the unassigned region node if the method is
192 * successful.
193 *
194 * <p>This method should be used when assigning a region.
195 *
196 * @param zkw zk reference
197 * @param region region to be created as offline
198 * @param serverName server transition will happen on
199 * @return the version of the znode created in OFFLINE state, -1 if
200 * unsuccessful.
201 * @throws KeeperException if unexpected zookeeper exception
202 * @throws KeeperException.NodeExistsException if node already exists
203 */
204 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
205 HRegionInfo region, ServerName serverName) throws KeeperException {
206 LOG.debug(zkw.prefix("Creating (or updating) unassigned node " +
207 region.getEncodedName() + " with OFFLINE state"));
208 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_OFFLINE,
209 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
210 byte [] data = rt.toByteArray();
211 String node = getNodeName(zkw, region.getEncodedName());
212 zkw.sync(node);
213 int version = ZKUtil.checkExists(zkw, node);
214 if (version == -1) {
215 return ZKUtil.createAndWatch(zkw, node, data);
216 } else {
217 boolean setData = false;
218 try {
219 setData = ZKUtil.setData(zkw, node, data, version);
220 // Setdata throws KeeperException which aborts the Master. So we are
221 // catching it here.
222 // If just before setting the znode to OFFLINE if the RS has made any
223 // change to the
224 // znode state then we need to return -1.
225 } catch (KeeperException kpe) {
226 LOG.info("Version mismatch while setting the node to OFFLINE state.");
227 return -1;
228 }
229 if (!setData) {
230 return -1;
231 } else {
232 // We successfully forced to OFFLINE, reset watch and handle if
233 // the state changed in between our set and the watch
234 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
235 rt = getRegionTransition(bytes);
236 if (rt.getEventType() != EventType.M_ZK_REGION_OFFLINE) {
237 // state changed, need to process
238 return -1;
239 }
240 }
241 }
242 return version + 1;
243 }
244
245 /**
246 * Deletes an existing unassigned node that is in the OPENED state for the
247 * specified region.
248 *
249 * <p>If a node does not already exist for this region, a
250 * {@link NoNodeException} will be thrown.
251 *
252 * <p>No watcher is set whether this succeeds or not.
253 *
254 * <p>Returns false if the node was not in the proper state but did exist.
255 *
256 * <p>This method is used during normal region transitions when a region
257 * finishes successfully opening. This is the Master acknowledging completion
258 * of the specified regions transition.
259 *
260 * @param zkw zk reference
261 * @param encodedRegionName opened region to be deleted from zk
262 * @param sn the expected region transition target server name
263 * @throws KeeperException if unexpected zookeeper exception
264 * @throws KeeperException.NoNodeException if node does not exist
265 */
266 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
267 String encodedRegionName, ServerName sn)
268 throws KeeperException, KeeperException.NoNodeException {
269 return deleteNode(zkw, encodedRegionName,
270 EventType.RS_ZK_REGION_OPENED, sn);
271 }
272
273 /**
274 * Deletes an existing unassigned node that is in the OFFLINE state for the
275 * specified region.
276 *
277 * <p>If a node does not already exist for this region, a
278 * {@link NoNodeException} will be thrown.
279 *
280 * <p>No watcher is set whether this succeeds or not.
281 *
282 * <p>Returns false if the node was not in the proper state but did exist.
283 *
284 * <p>This method is used during master failover when the regions on an RS
285 * that has died are all set to OFFLINE before being processed.
286 *
287 * @param zkw zk reference
288 * @param encodedRegionName closed region to be deleted from zk
289 * @param sn the expected region transition target server name
290 * @throws KeeperException if unexpected zookeeper exception
291 * @throws KeeperException.NoNodeException if node does not exist
292 */
293 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
294 String encodedRegionName, ServerName sn)
295 throws KeeperException, KeeperException.NoNodeException {
296 return deleteNode(zkw, encodedRegionName,
297 EventType.M_ZK_REGION_OFFLINE, sn);
298 }
299
300 /**
301 * Deletes an existing unassigned node that is in the CLOSED state for the
302 * specified region.
303 *
304 * <p>If a node does not already exist for this region, a
305 * {@link NoNodeException} will be thrown.
306 *
307 * <p>No watcher is set whether this succeeds or not.
308 *
309 * <p>Returns false if the node was not in the proper state but did exist.
310 *
311 * <p>This method is used during table disables when a region finishes
312 * successfully closing. This is the Master acknowledging completion
313 * of the specified regions transition to being closed.
314 *
315 * @param zkw zk reference
316 * @param encodedRegionName closed region to be deleted from zk
317 * @param sn the expected region transition target server name
318 * @throws KeeperException if unexpected zookeeper exception
319 * @throws KeeperException.NoNodeException if node does not exist
320 */
321 public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
322 String encodedRegionName, ServerName sn)
323 throws KeeperException, KeeperException.NoNodeException {
324 return deleteNode(zkw, encodedRegionName,
325 EventType.RS_ZK_REGION_CLOSED, sn);
326 }
327
328 /**
329 * Deletes an existing unassigned node that is in the CLOSING state for the
330 * specified region.
331 *
332 * <p>If a node does not already exist for this region, a
333 * {@link NoNodeException} will be thrown.
334 *
335 * <p>No watcher is set whether this succeeds or not.
336 *
337 * <p>Returns false if the node was not in the proper state but did exist.
338 *
339 * <p>This method is used during table disables when a region finishes
340 * successfully closing. This is the Master acknowledging completion
341 * of the specified regions transition to being closed.
342 *
343 * @param zkw zk reference
344 * @param region closing region to be deleted from zk
345 * @param sn the expected region transition target server name
346 * @throws KeeperException if unexpected zookeeper exception
347 * @throws KeeperException.NoNodeException if node does not exist
348 */
349 public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
350 HRegionInfo region, ServerName sn)
351 throws KeeperException, KeeperException.NoNodeException {
352 String encodedRegionName = region.getEncodedName();
353 return deleteNode(zkw, encodedRegionName,
354 EventType.M_ZK_REGION_CLOSING, sn);
355 }
356
357 /**
358 * Deletes an existing unassigned node that is in the specified state for the
359 * specified region.
360 *
361 * <p>If a node does not already exist for this region, a
362 * {@link NoNodeException} will be thrown.
363 *
364 * <p>No watcher is set whether this succeeds or not.
365 *
366 * <p>Returns false if the node was not in the proper state but did exist.
367 *
368 * <p>This method is used when a region finishes opening/closing.
369 * The Master acknowledges completion
370 * of the specified regions transition to being closed/opened.
371 *
372 * @param zkw zk reference
373 * @param encodedRegionName region to be deleted from zk
374 * @param expectedState state region must be in for delete to complete
375 * @param sn the expected region transition target server name
376 * @throws KeeperException if unexpected zookeeper exception
377 * @throws KeeperException.NoNodeException if node does not exist
378 */
379 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
380 EventType expectedState, ServerName sn)
381 throws KeeperException, KeeperException.NoNodeException {
382 return deleteNode(zkw, encodedRegionName, expectedState, sn, -1);
383 }
384
385 /**
386 * Deletes an existing unassigned node that is in the specified state for the
387 * specified region.
388 *
389 * <p>If a node does not already exist for this region, a
390 * {@link NoNodeException} will be thrown.
391 *
392 * <p>No watcher is set whether this succeeds or not.
393 *
394 * <p>Returns false if the node was not in the proper state but did exist.
395 *
396 * <p>This method is used when a region finishes opening/closing.
397 * The Master acknowledges completion
398 * of the specified regions transition to being closed/opened.
399 *
400 * @param zkw zk reference
401 * @param encodedRegionName region to be deleted from zk
402 * @param expectedState state region must be in for delete to complete
403 * @param expectedVersion of the znode that is to be deleted.
404 * If expectedVersion need not be compared while deleting the znode
405 * pass -1
406 * @throws KeeperException if unexpected zookeeper exception
407 * @throws KeeperException.NoNodeException if node does not exist
408 */
409 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
410 EventType expectedState, int expectedVersion)
411 throws KeeperException, KeeperException.NoNodeException {
412 return deleteNode(zkw, encodedRegionName, expectedState, null, expectedVersion);
413 }
414
415 /**
416 * Deletes an existing unassigned node that is in the specified state for the
417 * specified region.
418 *
419 * <p>If a node does not already exist for this region, a
420 * {@link NoNodeException} will be thrown.
421 *
422 * <p>No watcher is set whether this succeeds or not.
423 *
424 * <p>Returns false if the node was not in the proper state but did exist.
425 *
426 * <p>This method is used when a region finishes opening/closing.
427 * The Master acknowledges completion
428 * of the specified regions transition to being closed/opened.
429 *
430 * @param zkw zk reference
431 * @param encodedRegionName region to be deleted from zk
432 * @param expectedState state region must be in for delete to complete
433 * @param serverName the expected region transition target server name
434 * @param expectedVersion of the znode that is to be deleted.
435 * If expectedVersion need not be compared while deleting the znode
436 * pass -1
437 * @throws KeeperException if unexpected zookeeper exception
438 * @throws KeeperException.NoNodeException if node does not exist
439 */
440 public static boolean deleteNode(ZooKeeperWatcher zkw, String encodedRegionName,
441 EventType expectedState, ServerName serverName, int expectedVersion)
442 throws KeeperException, KeeperException.NoNodeException {
443 if (LOG.isTraceEnabled()) {
444 LOG.trace(zkw.prefix("Deleting existing unassigned " +
445 "node " + encodedRegionName + " in expected state " + expectedState));
446 }
447 String node = getNodeName(zkw, encodedRegionName);
448 zkw.sync(node);
449 Stat stat = new Stat();
450 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
451 if (bytes == null) {
452 // If it came back null, node does not exist.
453 throw KeeperException.create(Code.NONODE);
454 }
455 RegionTransition rt = getRegionTransition(bytes);
456 EventType et = rt.getEventType();
457 if (!et.equals(expectedState)) {
458 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName + " in " +
459 expectedState + " state but node is in " + et + " state"));
460 return false;
461 }
462 // Verify the server transition happens on is not changed
463 if (serverName != null && !rt.getServerName().equals(serverName)) {
464 LOG.warn(zkw.prefix("Attempting to delete unassigned node " + encodedRegionName
465 + " with target " + serverName + " but node has " + rt.getServerName()));
466 return false;
467 }
468 if (expectedVersion != -1
469 && stat.getVersion() != expectedVersion) {
470 LOG.warn("The node " + encodedRegionName + " we are trying to delete is not" +
471 " the expected one. Got a version mismatch");
472 return false;
473 }
474 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
475 LOG.warn(zkw.prefix("Attempting to delete " +
476 "unassigned node " + encodedRegionName + " in " + expectedState +
477 " state but after verifying state, we got a version mismatch"));
478 return false;
479 }
480 LOG.debug(zkw.prefix("Deleted unassigned node " +
481 encodedRegionName + " in expected state " + expectedState));
482 return true;
483 }
484
485 /**
486 * Deletes all unassigned nodes regardless of their state.
487 *
488 * <p>No watchers are set.
489 *
490 * <p>This method is used by the Master during cluster startup to clear out
491 * any existing state from other cluster runs.
492 *
493 * @param zkw zk reference
494 * @throws KeeperException if unexpected zookeeper exception
495 */
496 public static void deleteAllNodes(ZooKeeperWatcher zkw)
497 throws KeeperException {
498 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
499 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
500 }
501
502 /**
503 * Creates a new unassigned node in the CLOSING state for the specified
504 * region.
505 *
506 * <p>Does not transition nodes from any states. If a node already exists
507 * for this region, a {@link NodeExistsException} will be thrown.
508 *
509 * <p>If creation is successful, returns the version number of the CLOSING
510 * node created.
511 *
512 * <p>Set a watch.
513 *
514 * <p>This method should only be used by a Master when initiating a
515 * close of a region before sending a close request to the region server.
516 *
517 * @param zkw zk reference
518 * @param region region to be created as closing
519 * @param serverName server transition will happen on
520 * @return version of node after transition, -1 if unsuccessful transition
521 * @throws KeeperException if unexpected zookeeper exception
522 * @throws KeeperException.NodeExistsException if node already exists
523 */
524 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
525 ServerName serverName)
526 throws KeeperException, KeeperException.NodeExistsException {
527 LOG.debug(zkw.prefix("Creating unassigned node " +
528 region.getEncodedName() + " in a CLOSING state"));
529 RegionTransition rt = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING,
530 region.getRegionName(), serverName, HConstants.EMPTY_BYTE_ARRAY);
531 String node = getNodeName(zkw, region.getEncodedName());
532 return ZKUtil.createAndWatch(zkw, node, rt.toByteArray());
533 }
534
535 // RegionServer methods
536
537 /**
538 * Transitions an existing unassigned node for the specified region which is
539 * currently in the CLOSING state to be in the CLOSED state.
540 *
541 * <p>Does not transition nodes from other states. If for some reason the
542 * node could not be transitioned, the method returns -1. If the transition
543 * is successful, the version of the node after transition is returned.
544 *
545 * <p>This method can fail and return false for three different reasons:
546 * <ul><li>Unassigned node for this region does not exist</li>
547 * <li>Unassigned node for this region is not in CLOSING state</li>
548 * <li>After verifying CLOSING state, update fails because of wrong version
549 * (someone else already transitioned the node)</li>
550 * </ul>
551 *
552 * <p>Does not set any watches.
553 *
554 * <p>This method should only be used by a RegionServer when initiating a
555 * close of a region after receiving a CLOSE RPC from the Master.
556 *
557 * @param zkw zk reference
558 * @param region region to be transitioned to closed
559 * @param serverName server transition happens on
560 * @return version of node after transition, -1 if unsuccessful transition
561 * @throws KeeperException if unexpected zookeeper exception
562 */
563 public static int transitionNodeClosed(ZooKeeperWatcher zkw,
564 HRegionInfo region, ServerName serverName, int expectedVersion)
565 throws KeeperException {
566 return transitionNode(zkw, region, serverName,
567 EventType.M_ZK_REGION_CLOSING,
568 EventType.RS_ZK_REGION_CLOSED, expectedVersion);
569 }
570
571 /**
572 * Transitions an existing unassigned node for the specified region which is
573 * currently in the OFFLINE state to be in the OPENING state.
574 *
575 * <p>Does not transition nodes from other states. If for some reason the
576 * node could not be transitioned, the method returns -1. If the transition
577 * is successful, the version of the node written as OPENING is returned.
578 *
579 * <p>This method can fail and return -1 for three different reasons:
580 * <ul><li>Unassigned node for this region does not exist</li>
581 * <li>Unassigned node for this region is not in OFFLINE state</li>
582 * <li>After verifying OFFLINE state, update fails because of wrong version
583 * (someone else already transitioned the node)</li>
584 * </ul>
585 *
586 * <p>Does not set any watches.
587 *
588 * <p>This method should only be used by a RegionServer when initiating an
589 * open of a region after receiving an OPEN RPC from the Master.
590 *
591 * @param zkw zk reference
592 * @param region region to be transitioned to opening
593 * @param serverName server transition happens on
594 * @return version of node after transition, -1 if unsuccessful transition
595 * @throws KeeperException if unexpected zookeeper exception
596 */
597 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
598 HRegionInfo region, ServerName serverName)
599 throws KeeperException {
600 return transitionNodeOpening(zkw, region, serverName,
601 EventType.M_ZK_REGION_OFFLINE);
602 }
603
604 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
605 HRegionInfo region, ServerName serverName, final EventType beginState)
606 throws KeeperException {
607 return transitionNode(zkw, region, serverName, beginState,
608 EventType.RS_ZK_REGION_OPENING, -1);
609 }
610
611 /**
612 * Retransitions an existing unassigned node for the specified region which is
613 * currently in the OPENING state to be in the OPENING state.
614 *
615 * <p>Does not transition nodes from other states. If for some reason the
616 * node could not be transitioned, the method returns -1. If the transition
617 * is successful, the version of the node rewritten as OPENING is returned.
618 *
619 * <p>This method can fail and return -1 for three different reasons:
620 * <ul><li>Unassigned node for this region does not exist</li>
621 * <li>Unassigned node for this region is not in OPENING state</li>
622 * <li>After verifying OPENING state, update fails because of wrong version
623 * (someone else already transitioned the node)</li>
624 * </ul>
625 *
626 * <p>Does not set any watches.
627 *
628 * <p>This method should only be used by a RegionServer when initiating an
629 * open of a region after receiving an OPEN RPC from the Master.
630 *
631 * @param zkw zk reference
632 * @param region region to be transitioned to opening
633 * @param serverName server transition happens on
634 * @param updateZNode write the znode. If false, we only check.
635 * @return version of node after transition, -1 if unsuccessful transition
636 * @throws KeeperException if unexpected zookeeper exception
637 */
638 public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
639 HRegionInfo region, ServerName serverName, int expectedVersion, boolean updateZNode)
640 throws KeeperException {
641
642 String encoded = region.getEncodedName();
643 if(LOG.isDebugEnabled()) {
644 LOG.debug(zkw.prefix("Attempting to retransition opening state of node " +
645 HRegionInfo.prettyPrint(encoded)));
646 }
647
648 String node = getNodeName(zkw, encoded);
649 zkw.sync(node);
650
651 // Read existing data of the node
652 Stat stat = new Stat();
653 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
654 if (existingBytes == null) {
655 // Node no longer exists. Return -1. It means unsuccessful transition.
656 return -1;
657 }
658 RegionTransition rt = getRegionTransition(existingBytes);
659
660 // Verify it is the expected version
661 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
662 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
663 "unassigned node for " + encoded + " failed, " +
664 "the node existed but was version " + stat.getVersion() +
665 " not the expected version " + expectedVersion));
666 return -1;
667 }
668
669 // Verify it is in expected state
670 EventType et = rt.getEventType();
671 if (!et.equals(EventType.RS_ZK_REGION_OPENING)) {
672 String existingServer = (rt.getServerName() == null)
673 ? "<unknown>" : rt.getServerName().toString();
674 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the unassigned node for "
675 + encoded + " failed, the node existed but was in the state " + et +
676 " set by the server " + existingServer));
677 return -1;
678 }
679
680 // We don't have to write the new state: the check is complete.
681 if (!updateZNode){
682 return expectedVersion;
683 }
684
685 // Write new data, ensuring data has not changed since we last read it
686 try {
687 rt = RegionTransition.createRegionTransition(
688 EventType.RS_ZK_REGION_OPENING, region.getRegionName(), serverName, null);
689 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
690 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
691 "unassigned node for " + encoded + " failed, " +
692 "the node existed and was in the expected state but then when " +
693 "setting data we got a version mismatch"));
694 return -1;
695 }
696 if(LOG.isDebugEnabled()) {
697 LOG.debug(zkw.prefix("Retransition opening state of node " + encoded));
698 }
699 return stat.getVersion() + 1;
700 } catch (KeeperException.NoNodeException nne) {
701 LOG.warn(zkw.prefix("Attempt to retransition the opening state of the " +
702 "unassigned node for " + encoded + " failed, " +
703 "the node existed and was in the expected state but then when " +
704 "setting data it no longer existed"));
705 return -1;
706 }
707 }
708
709 /**
710 * Transitions an existing unassigned node for the specified region which is
711 * currently in the OPENING state to be in the OPENED state.
712 *
713 * <p>Does not transition nodes from other states. If for some reason the
714 * node could not be transitioned, the method returns -1. If the transition
715 * is successful, the version of the node after transition is returned.
716 *
717 * <p>This method can fail and return false for three different reasons:
718 * <ul><li>Unassigned node for this region does not exist</li>
719 * <li>Unassigned node for this region is not in OPENING state</li>
720 * <li>After verifying OPENING state, update fails because of wrong version
721 * (this should never actually happen since an RS only does this transition
722 * following a transition to OPENING. if two RS are conflicting, one would
723 * fail the original transition to OPENING and not this transition)</li>
724 * </ul>
725 *
726 * <p>Does not set any watches.
727 *
728 * <p>This method should only be used by a RegionServer when completing the
729 * open of a region.
730 *
731 * @param zkw zk reference
732 * @param region region to be transitioned to opened
733 * @param serverName server transition happens on
734 * @return version of node after transition, -1 if unsuccessful transition
735 * @throws KeeperException if unexpected zookeeper exception
736 */
737 public static int transitionNodeOpened(ZooKeeperWatcher zkw,
738 HRegionInfo region, ServerName serverName, int expectedVersion)
739 throws KeeperException {
740 return transitionNode(zkw, region, serverName,
741 EventType.RS_ZK_REGION_OPENING,
742 EventType.RS_ZK_REGION_OPENED, expectedVersion);
743 }
744
745 /**
746 *
747 * @param zkw zk reference
748 * @param region region to be closed
749 * @param expectedVersion expected version of the znode
750 * @return true if the znode exists, has the right version and the right state. False otherwise.
751 * @throws KeeperException
752 */
753 public static boolean checkClosingState(ZooKeeperWatcher zkw, HRegionInfo region,
754 int expectedVersion) throws KeeperException {
755
756 final String encoded = getNodeName(zkw, region.getEncodedName());
757 zkw.sync(encoded);
758
759 // Read existing data of the node
760 Stat stat = new Stat();
761 byte[] existingBytes = ZKUtil.getDataNoWatch(zkw, encoded, stat);
762
763 if (existingBytes == null) {
764 LOG.warn(zkw.prefix("Attempt to check the " +
765 "closing node for " + encoded +
766 ". The node does not exist"));
767 return false;
768 }
769
770 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
771 LOG.warn(zkw.prefix("Attempt to check the " +
772 "closing node for " + encoded +
773 ". The node existed but was version " + stat.getVersion() +
774 " not the expected version " + expectedVersion));
775 return false;
776 }
777
778 RegionTransition rt = getRegionTransition(existingBytes);
779
780 if (!EventType.M_ZK_REGION_CLOSING.equals(rt.getEventType())) {
781 LOG.warn(zkw.prefix("Attempt to check the " +
782 "closing node for " + encoded +
783 ". The node existed but was in an unexpected state: " + rt.getEventType()));
784 return false;
785 }
786
787 return true;
788 }
789
790 /**
791 * Method that actually performs unassigned node transitions.
792 *
793 * <p>Attempts to transition the unassigned node for the specified region
794 * from the expected state to the state in the specified transition data.
795 *
796 * <p>Method first reads existing data and verifies it is in the expected
797 * state. If the node does not exist or the node is not in the expected
798 * state, the method returns -1. If the transition is successful, the
799 * version number of the node following the transition is returned.
800 *
801 * <p>If the read state is what is expected, it attempts to write the new
802 * state and data into the node. When doing this, it includes the expected
803 * version (determined when the existing state was verified) to ensure that
804 * only one transition is successful. If there is a version mismatch, the
805 * method returns -1.
806 *
807 * <p>If the write is successful, no watch is set and the method returns true.
808 *
809 * @param zkw zk reference
810 * @param region region to be transitioned to opened
811 * @param serverName server transition happens on
812 * @param endState state to transition node to if all checks pass
813 * @param beginState state the node must currently be in to do transition
814 * @param expectedVersion expected version of data before modification, or -1
815 * @return version of node after transition, -1 if unsuccessful transition
816 * @throws KeeperException if unexpected zookeeper exception
817 */
818 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
819 ServerName serverName, EventType beginState, EventType endState,
820 int expectedVersion)
821 throws KeeperException {
822 return transitionNode(zkw, region, serverName, beginState, endState, expectedVersion, null);
823 }
824
825
826 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
827 ServerName serverName, EventType beginState, EventType endState,
828 int expectedVersion, final byte [] payload)
829 throws KeeperException {
830 String encoded = region.getEncodedName();
831 if(LOG.isDebugEnabled()) {
832 LOG.debug(zkw.prefix("Transitioning " + HRegionInfo.prettyPrint(encoded) +
833 " from " + beginState.toString() + " to " + endState.toString()));
834 }
835
836 String node = getNodeName(zkw, encoded);
837 zkw.sync(node);
838
839 // Read existing data of the node
840 Stat stat = new Stat();
841 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
842 if (existingBytes == null) {
843 // Node no longer exists. Return -1. It means unsuccessful transition.
844 return -1;
845 }
846
847 // Verify it is the expected version
848 if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
849 LOG.warn(zkw.prefix("Attempt to transition the " +
850 "unassigned node for " + encoded +
851 " from " + beginState + " to " + endState + " failed, " +
852 "the node existed but was version " + stat.getVersion() +
853 " not the expected version " + expectedVersion));
854 return -1;
855 }
856
857 if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
858 && endState.equals(EventType.RS_ZK_REGION_OPENING)
859 && expectedVersion == -1 && stat.getVersion() != 0) {
860 // the below check ensures that double assignment doesnot happen.
861 // When the node is created for the first time then the expected version
862 // that is passed will be -1 and the version in znode will be 0.
863 // In all other cases the version in znode will be > 0.
864 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
865 + encoded + " from " + beginState + " to " + endState + " failed, "
866 + "the node existed but was version " + stat.getVersion()
867 + " not the expected version " + expectedVersion));
868 return -1;
869 }
870
871 RegionTransition rt = getRegionTransition(existingBytes);
872
873 // Verify the server transition happens on is not changed
874 if (!rt.getServerName().equals(serverName)) {
875 LOG.warn(zkw.prefix("Attempt to transition the " +
876 "unassigned node for " + encoded +
877 " from " + beginState + " to " + endState + " failed, " +
878 "the server that tried to transition was " + serverName +
879 " not the expected " + rt.getServerName()));
880 return -1;
881 }
882
883 // Verify it is in expected state
884 EventType et = rt.getEventType();
885 if (!et.equals(beginState)) {
886 String existingServer = (rt.getServerName() == null)
887 ? "<unknown>" : rt.getServerName().toString();
888 LOG.warn(zkw.prefix("Attempt to transition the unassigned node for " + encoded
889 + " from " + beginState + " to " + endState + " failed, the node existed but"
890 + " was in the state " + et + " set by the server " + existingServer));
891 return -1;
892 }
893
894 // Write new data, ensuring data has not changed since we last read it
895 try {
896 rt = RegionTransition.createRegionTransition(
897 endState, region.getRegionName(), serverName, payload);
898 if(!ZKUtil.setData(zkw, node, rt.toByteArray(), stat.getVersion())) {
899 LOG.warn(zkw.prefix("Attempt to transition the " +
900 "unassigned node for " + encoded +
901 " from " + beginState + " to " + endState + " failed, " +
902 "the node existed and was in the expected state but then when " +
903 "setting data we got a version mismatch"));
904 return -1;
905 }
906 if(LOG.isDebugEnabled()) {
907 LOG.debug(zkw.prefix("Transitioned node " + encoded +
908 " from " + beginState + " to " + endState));
909 }
910 return stat.getVersion() + 1;
911 } catch (KeeperException.NoNodeException nne) {
912 LOG.warn(zkw.prefix("Attempt to transition the " +
913 "unassigned node for " + encoded +
914 " from " + beginState + " to " + endState + " failed, " +
915 "the node existed and was in the expected state but then when " +
916 "setting data it no longer existed"));
917 return -1;
918 }
919 }
920
921 private static RegionTransition getRegionTransition(final byte [] bytes) throws KeeperException {
922 try {
923 return RegionTransition.parseFrom(bytes);
924 } catch (DeserializationException e) {
925 // Convert to a zk exception for now. Otherwise have to change API
926 throw ZKUtil.convert(e);
927 }
928 }
929
930 /**
931 * Gets the current data in the unassigned node for the specified region name
932 * or fully-qualified path.
933 *
934 * <p>Returns null if the region does not currently have a node.
935 *
936 * <p>Sets a watch on the node if the node exists.
937 *
938 * @param zkw zk reference
939 * @param pathOrRegionName fully-specified path or region name
940 * @return znode content
941 * @throws KeeperException if unexpected zookeeper exception
942 */
943 public static byte [] getData(ZooKeeperWatcher zkw,
944 String pathOrRegionName)
945 throws KeeperException {
946 String node = getPath(zkw, pathOrRegionName);
947 return ZKUtil.getDataAndWatch(zkw, node);
948 }
949
950 /**
951 * Gets the current data in the unassigned node for the specified region name
952 * or fully-qualified path.
953 *
954 * <p>Returns null if the region does not currently have a node.
955 *
956 * <p>Sets a watch on the node if the node exists.
957 *
958 * @param zkw zk reference
959 * @param pathOrRegionName fully-specified path or region name
960 * @param stat object to populate the version.
961 * @return znode content
962 * @throws KeeperException if unexpected zookeeper exception
963 */
964 public static byte [] getDataAndWatch(ZooKeeperWatcher zkw,
965 String pathOrRegionName, Stat stat)
966 throws KeeperException {
967 String node = getPath(zkw, pathOrRegionName);
968 return ZKUtil.getDataAndWatch(zkw, node, stat);
969 }
970
971 /**
972 * Gets the current data in the unassigned node for the specified region name
973 * or fully-qualified path.
974 *
975 * <p>Returns null if the region does not currently have a node.
976 *
977 * <p>Does not set a watch.
978 *
979 * @param zkw zk reference
980 * @param pathOrRegionName fully-specified path or region name
981 * @param stat object to store node info into on getData call
982 * @return znode content
983 * @throws KeeperException if unexpected zookeeper exception
984 */
985 public static byte [] getDataNoWatch(ZooKeeperWatcher zkw,
986 String pathOrRegionName, Stat stat)
987 throws KeeperException {
988 String node = getPath(zkw, pathOrRegionName);
989 return ZKUtil.getDataNoWatch(zkw, node, stat);
990 }
991
992 /**
993 * @param zkw
994 * @param pathOrRegionName
995 * @return Path to znode
996 */
997 public static String getPath(final ZooKeeperWatcher zkw, final String pathOrRegionName) {
998 return pathOrRegionName.startsWith("/")? pathOrRegionName : getNodeName(zkw, pathOrRegionName);
999 }
1000
1001 /**
1002 * Get the version of the specified znode
1003 * @param zkw zk reference
1004 * @param region region's info
1005 * @return the version of the znode, -1 if it doesn't exist
1006 * @throws KeeperException
1007 */
1008 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
1009 throws KeeperException {
1010 String znode = getNodeName(zkw, region.getEncodedName());
1011 return ZKUtil.checkExists(zkw, znode);
1012 }
1013
1014 /**
1015 * Delete the assignment node regardless of its current state.
1016 * <p>
1017 * Fail silent even if the node does not exist at all.
1018 * @param watcher
1019 * @param regionInfo
1020 * @throws KeeperException
1021 */
1022 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
1023 HRegionInfo regionInfo)
1024 throws KeeperException {
1025 String node = getNodeName(watcher, regionInfo.getEncodedName());
1026 ZKUtil.deleteNodeFailSilent(watcher, node);
1027 }
1028
1029 /**
1030 * Blocks until there are no node in regions in transition.
1031 * <p>
1032 * Used in testing only.
1033 * @param zkw zk reference
1034 * @throws KeeperException
1035 * @throws InterruptedException
1036 */
1037 public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
1038 throws KeeperException, InterruptedException {
1039 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1040 List<String> znodes =
1041 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1042 if (znodes != null && !znodes.isEmpty()) {
1043 LOG.debug("Waiting on RIT: " + znodes);
1044 }
1045 Thread.sleep(100);
1046 }
1047 }
1048
1049 /**
1050 * Blocks until there is at least one node in regions in transition.
1051 * <p>
1052 * Used in testing only.
1053 * @param zkw zk reference
1054 * @throws KeeperException
1055 * @throws InterruptedException
1056 */
1057 public static void blockUntilRIT(ZooKeeperWatcher zkw)
1058 throws KeeperException, InterruptedException {
1059 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
1060 List<String> znodes =
1061 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
1062 if (znodes == null || znodes.isEmpty()) {
1063 LOG.debug("No RIT in ZK");
1064 }
1065 Thread.sleep(100);
1066 }
1067 }
1068
1069 /**
1070 * Presume bytes are serialized unassigned data structure
1071 * @param znodeBytes
1072 * @return String of the deserialized znode bytes.
1073 */
1074 static String toString(final byte[] znodeBytes) {
1075 // This method should not exist. Used by ZKUtil stringifying RegionTransition. Have the
1076 // method in here so RegionTransition does not leak into ZKUtil.
1077 try {
1078 RegionTransition rt = RegionTransition.parseFrom(znodeBytes);
1079 return rt.toString();
1080 } catch (DeserializationException e) {
1081 return "";
1082 }
1083 }
1084 }