Skip to content

Commit

Permalink
Enable supression of dead servers in Monitor by resource group (apach…
Browse files Browse the repository at this point in the history
…e#5226)

Closes apache#5162


Co-authored-by: Dom G. <[email protected]>
  • Loading branch information
dlmarion and DomGarguilo authored Jan 8, 2025
1 parent 05fe071 commit d2f25d3
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,12 @@ public enum Property {
+ " The resources that are used by default can be seen in"
+ " `accumulo/server/monitor/src/main/resources/templates/default.ftl`.",
"2.0.0"),
MONITOR_DEAD_LIST_RG_EXCLUSIONS("monitor.dead.server.rg.exclusions", "", PropertyType.STRING,
"The Monitor displays information about servers that it believes have died recently."
+ " This property accepts a comma separated list of resource group names. If"
+ " the dead servers resource group matches a resource group in this list,"
+ " then it will be suppressed from the dead servers list in the monitor.",
"4.0.0"),
// per table properties
TABLE_PREFIX("table.", null, PropertyType.PREFIX,
"Properties in this category affect tablet server treatment of tablets,"
Expand Down
4 changes: 4 additions & 0 deletions server/monitor/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@
<groupId>org.apache.accumulo</groupId>
<artifactId>accumulo-start</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-web</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.lang.management.ManagementFactory;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
Expand All @@ -32,6 +33,7 @@
import jakarta.ws.rs.core.MediaType;

import org.apache.accumulo.core.client.admin.servers.ServerId;
import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.gc.thrift.GCStatus;
import org.apache.accumulo.core.manager.thrift.DeadServer;
import org.apache.accumulo.core.manager.thrift.ManagerMonitorInfo;
Expand All @@ -46,6 +48,7 @@
import org.apache.accumulo.monitor.rest.tservers.ServerShuttingDownInformation;
import org.apache.accumulo.monitor.rest.tservers.ServersShuttingDown;
import org.apache.accumulo.server.manager.state.TabletServerState;
import org.apache.commons.lang3.StringUtils;

/**
* Responsible for generating a new Manager information JSON object
Expand Down Expand Up @@ -173,10 +176,23 @@ public static DeadServerList getDeadTservers(Monitor monitor) {
}

DeadServerList deadServers = new DeadServerList();

String prop = monitor.getConfiguration().get(Property.MONITOR_DEAD_LIST_RG_EXCLUSIONS);
Set<String> exclusions = null;
if (StringUtils.isNotBlank(prop)) {
String[] rgs = prop.split(",");
exclusions = new HashSet<>(rgs.length);
for (String s : rgs) {
exclusions.add(s.trim());
}
}

// Add new dead servers to the list
for (DeadServer dead : mmi.deadTabletServers) {
deadServers
.addDeadServer(new DeadServerInformation(dead.server, dead.lastStatus, dead.status));
if (exclusions == null || !exclusions.contains(dead.getResourceGroup())) {
deadServers
.addDeadServer(new DeadServerInformation(dead.server, dead.lastStatus, dead.status));
}
}
return deadServers;
}
Expand Down

0 comments on commit d2f25d3

Please sign in to comment.