Specify credentials for Data Grid user with permission to access the REST endpoint
Set an interval between cross-site status checks
Provide a list of servers
#!/usr/bin/python3
import time
import requests
from requests.auth import HTTPDigestAuth
class InfinispanConnection:
def __init__(self, server: str = 'http://localhost:11222', cache_manager: str = 'default',
auth: tuple = ('admin', 'change_me')) -> None:
super().__init__()
self.__url = f'{server}/rest/v2/container/x-site/backups/'
self.__auth = auth
self.__headers = {
'accept': 'application/json'
}
def get_sites_status(self):
try:
rsp = requests.get(self.__url, headers=self.__headers, auth=HTTPDigestAuth(self.__auth[0], self.__auth[1]))
if rsp.status_code != 200:
return None
return rsp.json()
except:
return None
# Specify credentials for Data Grid user with permission to access the REST endpoint
USERNAME = 'admin'
PASSWORD = 'change_me'
# Set an interval between cross-site status checks
POLL_INTERVAL_SEC = 5
# Provide a list of servers
SERVERS = [
InfinispanConnection('http://127.0.0.1:11222', auth=(USERNAME, PASSWORD)),
InfinispanConnection('http://127.0.0.1:12222', auth=(USERNAME, PASSWORD))
]
#Specify the names of remote sites
REMOTE_SITES = [
'nyc'
]
#Provide a list of caches to monitor
CACHES = [
'work',
'sessions'
]
def on_event(site: str, cache: str, old_status: str, new_status: str):
# TODO implement your handling code here
print(f'site={site} cache={cache} Status changed {old_status} -> {new_status}')
def __handle_mixed_state(state: dict, site: str, site_status: dict):
if site not in state:
state[site] = {c: 'online' if c in site_status['online'] else 'offline' for c in CACHES}
return
for cache in CACHES:
__update_cache_state(state, site, cache, 'online' if cache in site_status['online'] else 'offline')
def __handle_online_or_offline_state(state: dict, site: str, new_status: str):
if site not in state:
state[site] = {c: new_status for c in CACHES}
return
for cache in CACHES:
__update_cache_state(state, site, cache, new_status)
def __update_cache_state(state: dict, site: str, cache: str, new_status: str):
old_status = state[site].get(cache)
if old_status != new_status:
on_event(site, cache, old_status, new_status)
state[site][cache] = new_status
def update_state(state: dict):
rsp = None
for conn in SERVERS:
rsp = conn.get_sites_status()
if rsp:
break
if rsp is None:
print('Unable to fetch site status from any server')
return
for site in REMOTE_SITES:
site_status = rsp.get(site, {})
new_status = site_status.get('status')
if new_status == 'mixed':
__handle_mixed_state(state, site, site_status)
else:
__handle_online_or_offline_state(state, site, new_status)
if __name__ == '__main__':
_state = {}
while True:
update_state(_state)
time.sleep(POLL_INTERVAL_SEC)
Copy to ClipboardCopied!Toggle word wrapToggle overflow
在以下示例中,当 NYC 站点为名为 work 或 session 的缓存 离线时,Prometheus 会发出警告。
groups:
- name: Cross Site Rules
rules:
- alert: Cache Work and Site NYC
expr: vendor_cache_manager_default_cache_work_x_site_admin_nyc_status == 0
- alert: Cache Sessions and Site NYC
expr: vendor_cache_manager_default_cache_sessions_x_site_admin_nyc_status == 0
groups:-name: Cross Site Rules
rules:-alert: Cache Work and Site NYC
expr: vendor_cache_manager_default_cache_work_x_site_admin_nyc_status == 0
-alert: Cache Sessions and Site NYC
expr: vendor_cache_manager_default_cache_sessions_x_site_admin_nyc_status == 0
Copy to ClipboardCopied!Toggle word wrapToggle overflow