diff --git a/ansible/requirements.txt b/ansible/requirements.txt index f0c7be4b..f0111ae3 100644 --- a/ansible/requirements.txt +++ b/ansible/requirements.txt @@ -1,3 +1,3 @@ ansible==12.0.0 ansible-lint==25.9.1 -pip==25.2 +pip==25.3 diff --git a/images/mariadb-galera/Dockerfile b/images/mariadb-galera/Dockerfile index 56ddf01a..8ff74cca 100644 --- a/images/mariadb-galera/Dockerfile +++ b/images/mariadb-galera/Dockerfile @@ -1,8 +1,8 @@ -FROM python:3.8.10-slim-buster -MAINTAINER Rich Braun "docker@instantlinux.net" +FROM mariadb:12.0.2 ARG BUILD_DATE ARG VCS_REF -LABEL org.label-schema.build-date=$BUILD_DATE \ +LABEL org.opencontainers.image.authors="Rich Braun docker@instantlinux.net" \ + org.label-schema.build-date=$BUILD_DATE \ org.label-schema.license=GPL-2.0 \ org.label-schema.name=mariadb-galera \ org.label-schema.vcs-ref=$VCS_REF \ @@ -12,41 +12,26 @@ ENV DEBIAN_FRONTEND=noninteractive \ CLUSTER_NAME=cluster01 \ CLUSTER_SIZE=3 \ DISCOVERY_SERVICE=etcd:2379 \ - ROOT_PASSWORD_SECRET=mysql-root-password \ + ROOT_SECNAME=mysql-root-password \ TTL=10 \ - TZ=UTC \ - SST_AUTH_SECRET=sst-auth-password - -ARG MARIADB_MAJOR=10.4 -ARG MARIADB_VERSION=10.4.20 -ARG APT_KEY=F1656F24C74CD1D8 -ARG DEB_REL=buster + TZ=UTC ARG UID=212 ARG GID=212 COPY requirements/ /root/ - -RUN apt-get -yq update && apt-get install -yq gnupg && \ - apt-key adv --recv-keys --keyserver keyserver.ubuntu.com $APT_KEY && \ - echo "deb [arch=amd64] \ - http://nyc2.mirrors.digitalocean.com/mariadb/repo/$MARIADB_MAJOR/debian $DEB_REL main" \ - > /etc/apt/sources.list.d/mariadb.list && \ - groupadd -g $GID mysql && \ - useradd -u $UID -g $GID -s /bin/false -c "MariaDB" -d /none mysql && \ - apt-get -yq update && apt-get -yq install --no-install-recommends \ - curl iputils-ping jq mariadb-server=1:$MARIADB_VERSION+maria~$DEB_REL \ - mariadb-backup=1:$MARIADB_MAJOR_$MARIADB_VERSION+maria~$DEB_REL \ - mariadb-client=1:$MARIADB_MAJOR_$MARIADB_VERSION+maria~$DEB_REL \ - net-tools netcat procps && \ - apt-get clean && rm -fr /var/log/* /var/lib/mysql/* && \ - rm -fr /root/.cache /usr/share/zoneinfo/leap-seconds.list -RUN pip install -r /root/common.txt && \ +RUN groupmod -g $GID mysql && \ + usermod -u $UID -s /bin/false -c "MariaDB" -d /none mysql && \ + apt -yq update && apt -yq install --no-install-recommends \ + curl iputils-ping jq net-tools netcat-openbsd procps \ + python3 python3-pip python3-etcd3 && \ + apt-get clean && rm -fr /var/log/* /var/lib/mysql/* \ + /var/lib/apt/lists /var/cache/debconf/*old /root/.cache +RUN pip install -r /root/common.txt --break-system-packages && \ echo "dash dash/sh boolean false" | debconf-set-selections && \ dpkg-reconfigure dash || true EXPOSE 3306 4444 4567/udp 4567 4568 VOLUME /var/lib/mysql - HEALTHCHECK --interval=10s --timeout=3s --retries=30 \ CMD /bin/sh /usr/local/bin/healthcheck.sh || exit 1 diff --git a/images/mariadb-galera/Dockerfile.alpine b/images/mariadb-galera/Dockerfile.alpine index c31f75d7..a62fdcee 100644 --- a/images/mariadb-galera/Dockerfile.alpine +++ b/images/mariadb-galera/Dockerfile.alpine @@ -1,8 +1,12 @@ -FROM python:3.7.0-alpine3.8 -MAINTAINER Rich Braun "docker@instantlinux.net" +# Experimental - this "almost" works but SST transfers fail, apparently +# due to issues with the mariadb-backup script distributed with alpine; +# abandoned this in favor of the image distributed by MariaDB maintainers + +FROM python:3.14.0-alpine3.22 ARG BUILD_DATE ARG VCS_REF -LABEL org.label-schema.build-date=$BUILD_DATE \ +LABEL org.opencontainers.image.authors="Rich Braun docker@instantlinux.net" \ + org.label-schema.build-date=$BUILD_DATE \ org.label-schema.license=GPL-2.0 \ org.label-schema.name=mariadb-galera \ org.label-schema.vcs-ref=$VCS_REF \ @@ -11,27 +15,25 @@ LABEL org.label-schema.build-date=$BUILD_DATE \ ENV CLUSTER_NAME=cluster01 \ CLUSTER_SIZE=3 \ DISCOVERY_SERVICE=etcd:2379 \ - ROOT_PASSWORD_SECRET=mysql-root-password \ + PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python \ + ROOT_SECNAME=mysql-root-password \ TTL=10 \ TZ=UTC \ - SST_PASSWORD= \ - SST_SECRET=sst-auth-password + SST_SECNAME=sst-auth-password -ARG MARIADB_MAJOR=10.3 -ARG MARIADB_VERSION=10.3.9-r2 +ARG MARIADB_MAJOR=11.4 +ARG MARIADB_VERSION=11.4.8-r0 ARG UID=212 ARG GID=212 COPY requirements/ /root/ -RUN echo '@edge http://dl-cdn.alpinelinux.org/alpine/edge/main' \ - >>/etc/apk/repositories && \ - addgroup -g $GID mysql && \ +RUN addgroup -g $GID mysql && \ adduser -u $UID -G mysql -s /bin/false -g "MariaDB" -h /none -D mysql && \ apk add --update --no-cache \ - curl jq mariadb@edge=$MARIADB_VERSION \ - mariadb-backup@edge=$MARIADB_VERSION \ - mariadb-client@edge=$MARIADB_VERSION net-tools socat && \ + bash curl galera jq mariadb=$MARIADB_VERSION \ + mariadb-backup=$MARIADB_VERSION \ + mariadb-client=$MARIADB_VERSION net-tools pv socat && \ pip install -r /root/common.txt && \ ln -s /usr/bin/mysqld /usr/sbin && \ rm -fr /var/log/* /var/lib/mysql/* @@ -44,6 +46,7 @@ HEALTHCHECK --interval=10s --timeout=3s --retries=30 \ COPY wsrep.cnf my.cnf /etc/ COPY src/entrypoint.py src/healthcheck.sh /usr/local/bin/ +COPY wsrep_sst_mariabackup /usr/bin/ ENTRYPOINT ["/usr/local/bin/entrypoint.py"] # TODO: fix healthcheck.sh to handle long-duration bootstrap diff --git a/images/mariadb-galera/Makefile b/images/mariadb-galera/Makefile index ad38e418..5fd9089a 100644 --- a/images/mariadb-galera/Makefile +++ b/images/mariadb-galera/Makefile @@ -27,7 +27,7 @@ test_requirements: python_env $(VDIR)/bin/python: @echo "Creating virtual environment" - virtualenv --system-site-packages $(VDIR) + python3 -m venv --system-site-packages $(VDIR) pytest: test_requirements @echo "Running pytest unit tests" diff --git a/images/mariadb-galera/README.md b/images/mariadb-galera/README.md index 6750eed7..8301b87e 100644 --- a/images/mariadb-galera/README.md +++ b/images/mariadb-galera/README.md @@ -1,11 +1,11 @@ ## mariadb-galera [![](https://img.shields.io/docker/v/instantlinux/mariadb-galera?sort=date)](https://hub.docker.com/r/instantlinux/mariadb-galera/tags "Version badge") [![](https://img.shields.io/docker/image-size/instantlinux/mariadb-galera?sort=date)](https://github.com/instantlinux/docker-tools/tree/main/images/mariadb-galera "Image badge") [![](https://img.shields.io/badge/dockerfile-latest-blue)](https://gitlab.com/instantlinux/docker-tools/-/blob/main/images/mariadb-galera/Dockerfile "dockerfile") -MariaDB 10.4 with automatic cluster generation under kubernetes / swarm using named volumes for data persistence. This has robust bootstrap logic based on MariaDB / Galera documentation for automated cluster create / join operations. +MariaDB 12.x with automatic cluster generation under kubernetes / swarm using named volumes for data persistence. This has robust bootstrap logic based on MariaDB / Galera documentation for automated cluster create / join operations. Requires an etcd instance for sharing instance-health data across the cluster. ### Usage - kubernetes -Define the following dependencies before launching the cluster: passwords for root and SST, network load balancer, and a dedicated etcd key-value store. Here's how: +Define the following dependencies before launching the cluster: password for root, network load balancer, and a dedicated etcd key-value store. Here's how: Create a random root password: ``` @@ -25,7 +25,7 @@ EOT sekret enc /dev/shm/new.yaml >secrets/$SECRET rm /dev/shm/new.yaml ``` -You can use a tool like [sops](https://github.com/mozilla/sops) or [sekret](https://github.com/nownabe/sekret) to generate the secrets file. Do the same for an sst-auth-password. +You can use a tool like [sops](https://github.com/mozilla/sops) or [sekret](https://github.com/nownabe/sekret) to generate the secrets file. Set any local my.cnf values in files under a volume mount for /etc/mysql/my.cnf.d (mapped as $ADMIN_PATH/mariadb/etc/). Use @@ -90,9 +90,19 @@ cd docker-tools/k8s make db00 ~~~ +### Restarting + +When taking the database down, wait for all pods to stop, and then clear etcd entries for the cluster: +``` +CLUSTER=db00 +ETCD_HOST=10.101.1.19 +etcdctl --endpoints=$ETCD_HOST:2379 del --prefix /galera/$CLUSTER +``` +Then launch with the helm chart or docker-compose. + ### Usage - swarm -This was originally developed under docker Swarm. A [docker-compose](https://github.com/instantlinux/docker-tools/blob/main/images/mariadb-galera/docker.compose) file is a legacy of that original work. Before stack-deploying it, invoke _docker secret create_ to generate the two secrets _mysql-root-password_ and _sst-auth-password-, and define an ADMIN_PATH environment variable pointing to your my.cnf (it has to be in the same location on each docker node). +This was originally developed under docker Swarm. A [docker-compose](https://github.com/instantlinux/docker-tools/blob/main/images/mariadb-galera/docker.compose) file is a legacy of that original work. Before stack-deploying it, invoke _docker secret create_ to generate the secret _mysql-root-password_, and define an ADMIN_PATH environment variable pointing to your my.cnf (it has to be in the same location on each docker node). ### Variables @@ -102,11 +112,11 @@ This was originally developed under docker Swarm. A [docker-compose](https://git | CLUSTER_NAME | cluster01 | cluster name | | CLUSTER_SIZE | 3 | expected number of nodes | | DISCOVERY_SERVICE | etcd:2379 | etcd host list, e.g. etcd1:2379,etcd2:2379 | +| LOG_LEVEL | info | set to debug for additional logging | | REINSTALL_OK | | set to any value to enable reinstall over old volume | -| ROOT_PASSWORD_SECRET | mysql-root-password | name of secret for password | +| ROOT_SECNAME | mysql-root-password | name of secret for password | | TTL | 10 | longevity (in seconds) of keys posted to etcd | | TZ | UTC | timezone | -| SST_AUTH_SECRET | sst-auth-password | name of secret for password | ### Notes @@ -129,6 +139,8 @@ configuration. It requires a stable etcd configuration for node discovery and master election at restart. A single instance can be invoked without HA resources using kubernetes-single.yaml. +There is no supported etcd3 library for python3 (as of Oct 2025). For now, this is using python-etcd3 0.12.0, last updated in 2020, with PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION set for compatibility. + ### Credits Thanks to ashraf-s9s of severalnines for the healthcheck script. diff --git a/images/mariadb-galera/helm/Chart.yaml b/images/mariadb-galera/helm/Chart.yaml index cf8e1879..ef8df6f6 100644 --- a/images/mariadb-galera/helm/Chart.yaml +++ b/images/mariadb-galera/helm/Chart.yaml @@ -7,8 +7,8 @@ sources: - https://github.com/MariaDB/server - https://github.com/MariaDB/galera type: application -version: 0.1.0 -appVersion: "10.4.20" +version: 0.1.1 +appVersion: "12.0.2" dependencies: - name: chartlib version: 0.1.8 diff --git a/images/mariadb-galera/helm/templates/configmap.yaml b/images/mariadb-galera/helm/templates/configmap.yaml index e7ed287e..ee91dfe6 100644 --- a/images/mariadb-galera/helm/templates/configmap.yaml +++ b/images/mariadb-galera/helm/templates/configmap.yaml @@ -57,7 +57,6 @@ data: # * InnoDB # innodb_data_file_path = ibdata1:10M:autoextend - innodb_buffer_pool_instances = {{ .Values.innodb_buffer_pool_instances }} innodb_buffer_pool_size = {{ .Values.innodb_buffer_pool_size }} innodb_log_file_size = {{ .Values.innodb_log_file_size }} diff --git a/images/mariadb-galera/helm/values.yaml b/images/mariadb-galera/helm/values.yaml index 4aaa7fa5..154b4ffd 100644 --- a/images/mariadb-galera/helm/values.yaml +++ b/images/mariadb-galera/helm/values.yaml @@ -3,7 +3,6 @@ ipReadOnly: 10.101.1.101 nodePort: 30306 character_set_server: utf8 ft_min_word_len: 3 -innodb_buffer_pool_instances: 1 innodb_buffer_pool_size: 1024M innodb_log_file_size: 32M interactive_timeout: 28800 @@ -26,8 +25,9 @@ statefulset: containerPorts: [ containerPort: 3306 ] env: cluster_name: mariadb - discovery_service: "10.101.1.19:2379" cluster_size: 3 + discovery_service: "10.101.1.19:2379" + log_level: info replicas: 3 resources: limits: @@ -48,9 +48,6 @@ volumeMounts: - name: mysql-root-password mountPath: /run/secrets/mysql-root-password subPath: mysql-root-password -- name: sst-auth-password - mountPath: /run/secrets/sst-auth-password - subPath: sst-auth-password volumes: - name: etc configMap: @@ -58,9 +55,6 @@ volumes: - name: mysql-root-password secret: secretName: mysql-root-password -- name: sst-auth-password - secret: - secretName: sst-auth-password volumeClaimTemplates: - metadata: name: data diff --git a/images/mariadb-galera/hooks/add_tags b/images/mariadb-galera/hooks/add_tags index 303d1afe..524df427 100755 --- a/images/mariadb-galera/hooks/add_tags +++ b/images/mariadb-galera/hooks/add_tags @@ -1,3 +1,3 @@ #!/bin/sh -TAG=$(grep "ARG .*_VERSION" Dockerfile | cut -d= -f 2) +TAG=$(grep "FROM mariadb:" Dockerfile | cut -d: -f 2) echo "--tag $DOCKER_REPO:$TAG" diff --git a/images/mariadb-galera/my.cnf b/images/mariadb-galera/my.cnf index 7ee859b7..659c1b60 100644 --- a/images/mariadb-galera/my.cnf +++ b/images/mariadb-galera/my.cnf @@ -10,6 +10,7 @@ socket = /dev/shm/mysqld.sock default_storage_engine = InnoDB query_cache_size = 0 query_cache_type = 0 +slave_connections_needed_for_purge = 0 innodb_flush_log_at_trx_commit = 0 innodb_flush_method = O_DIRECT diff --git a/images/mariadb-galera/requirements/common.txt b/images/mariadb-galera/requirements/common.txt index eb6c8181..8704d54a 100644 --- a/images/mariadb-galera/requirements/common.txt +++ b/images/mariadb-galera/requirements/common.txt @@ -1 +1 @@ -python-etcd==0.4.5 +etcd3==0.12.0 diff --git a/images/mariadb-galera/requirements/test.txt b/images/mariadb-galera/requirements/test.txt index 634cd58b..4d76ef12 100644 --- a/images/mariadb-galera/requirements/test.txt +++ b/images/mariadb-galera/requirements/test.txt @@ -1,5 +1,5 @@ -coverage==5.4 -flake8==3.9.0 -mock==4.0.3 -pytest==6.2.2 -pytest-cov==2.11.1 +coverage==7.11.0 +flake8==7.3.0 +mock==5.2.0 +pytest==8.4.2 +pytest-cov==7.0.0 diff --git a/images/mariadb-galera/src/entrypoint.py b/images/mariadb-galera/src/entrypoint.py index 582a2c04..8ebce732 100755 --- a/images/mariadb-galera/src/entrypoint.py +++ b/images/mariadb-galera/src/entrypoint.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # MariaDB cluster startup # This script upon startup waits for a specified number of nodes @@ -22,7 +22,7 @@ import sys import time -import etcd +import etcd3 class Constants(object): @@ -31,6 +31,9 @@ class Constants(object): ETCD_PREFIX = '/galera' LOG_DIR = '/var/log/mysql' + ETCD_RETRIES = 2 + ETCD_RETRY_WAIT = 5 + KEY_CLUSTER_UPDATE_TIMER = 'update_timer' KEY_HEALTH = 'health' KEY_HOSTNAME = 'hostname' @@ -82,10 +85,9 @@ def __init__(self): self.my_hostname = socket.gethostname() self.my_ipv4 = socket.gethostbyname(self.my_hostname) self.data_dir = self._invoke( - 'mysqld --verbose --help --wsrep-cluster-address=none ' + 'mariadbd --verbose --help --wsrep-cluster-address=none ' '| grep ^datadir').split()[1].strip() self.root_password = self._get_root_password() - self.sst_password = self._get_sst_password() self.prev_address = None def share_initial_state(self, discovery): @@ -108,7 +110,7 @@ def share_initial_state(self, discovery): ttl=self.ttl_stack_up) try: discovery.set_key(Constants.KEY_SAFE_TO_BOOTSTRAP, - self._is_safe_to_boot(), + str(self._is_safe_to_boot()), ttl=self.ttl_stack_up) except AssertionError: pass @@ -155,12 +157,10 @@ def wait_checkin(self, retry_interval=5): def start_database(self, cluster_address='', wsrep_new_cluster=False, cmdarg=None): command = ( - 'exec /usr/sbin/mysqld --wsrep_cluster_name=%(cluster_name)s ' - '--wsrep-cluster-address="gcomm://%(address)s" ' - '--wsrep_sst_auth="sst:%(sst_password)s"' % { + 'exec /usr/sbin/mariadbd --wsrep_cluster_name=%(cluster_name)s ' + '--wsrep-cluster-address="gcomm://%(address)s"' % { 'cluster_name': self.name, - 'address': cluster_address, - 'sst_password': self.sst_password}) + 'address': cluster_address}) if wsrep_new_cluster: command += ' --wsrep-new-cluster' if cmdarg: @@ -172,10 +172,11 @@ def start_database(self, cluster_address='', wsrep_new_cluster=False, 'Network connectivity problem for %s' % cluster_address) while True: # skew startup of concurrent launches by self.ttl_lock seconds - try: - self.discovery.acquire_lock('bootstrap', ttl=self.ttl_lock) - except etcd.EtcdLockExpired: - pass + ret = self.discovery.acquire_lock('bootstrap', ttl=self.ttl_lock) + if not ret: + logging.info({'action': 'acquire_lock', + 'lock_name': 'bootstrap', + 'message': 'ttl expired'}) if Constants.STATUS_DONOR in self._cluster_health(): # perform only one SST join at a time, loop until others done time.sleep(5) @@ -245,6 +246,9 @@ def restart_database(self, node_list): recovered_position = int(val) addr_highest_pos = ipv4 + logging.debug({'action': 'restart', + 'safe_to_bootstrap': safe_to_bootstrap, + 'recoverable_nodes': recoverable_nodes}) if safe_to_bootstrap == 1: # Cluster was shut down normally logging.info({'action': 'restart_database', @@ -277,7 +281,7 @@ def report_status(self): def _set_wsrep_key(key): val = self._invoke( - 'mysql -u root -p%(pw)s -Bse ' + 'mariadb -u root -p%(pw)s -Bse ' '"SHOW STATUS LIKE \'%(key)s\';"' % { 'pw': self.root_password, 'key': key}).split()[1] self.discovery.set_key(key, val, ttl=self.discovery.ttl) @@ -287,7 +291,7 @@ def _set_wsrep_key(key): try: self.discovery.set_key(Constants.KEY_HOSTNAME, self.my_hostname) - except etcd.EtcdException as ex: + except etcd3.Etcd3Exception as ex: logging.warn(dict(log_info, **{'message': str(ex)})) try: status = _set_wsrep_key(Constants.KEY_WSREP_LOCAL_STATE_COMMENT) @@ -304,7 +308,7 @@ def _set_wsrep_key(key): Constants.STATUS_DEGRADED) except IndexError: pass - except etcd.EtcdException as ex: + except etcd3.Etcd3Exception as ex: logging.warn(dict(log_info, **{'message': str(ex)})) def _get_root_password(self): @@ -318,7 +322,7 @@ def _get_root_password(self): return os.environ['MYSQL_ROOT_PASSWORD'] try: with open(os.path.join('/run/secrets', - os.environ['ROOT_PASSWORD_SECRET']), + os.environ['ROOT_SECNAME']), 'r') as f: pw = f.read() return pw @@ -329,18 +333,6 @@ def _get_root_password(self): else: raise AssertionError('Root password must be specified') - def _get_sst_password(self): - if 'SST_PASSWORD' in os.environ: - return os.environ['SST_PASSWORD'] - try: - with open(os.path.join('/run/secrets', - os.environ['SST_AUTH_SECRET']), 'r') as f: - pw = f.read() - return pw - except IOError: - pass - return '' - def _is_new_install(self): return (not os.path.exists(os.path.join(self.data_dir, 'ibdata1')) and not os.path.exists(os.path.join(self.data_dir, 'mysql'))) @@ -381,15 +373,16 @@ def _get_recovered_position(self): returns: int raises: AssertionError if not found """ - uuid_pat = re.compile('[a-z0-9]*-[a-z0-9]*:-*[0-9]', re.I) + uuid_pat = re.compile(r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-" + "[0-9a-f]{4}-[0-9a-f]{12}:[0-9]*") filename = os.path.join(self.data_dir, '%s.err' % self.my_hostname) - self._invoke('mysqld_safe --wsrep-cluster-address=gcomm:// ' + self._invoke('mariadbd-safe --wsrep-cluster-address=gcomm:// ' '--wsrep-recover --skip-syslog', ignore_errors=False) with open(filename, 'r') as f: for line in f: - match = re.match(uuid_pat, line) + match = re.search(uuid_pat, line) if match: - return int(match.split(':')[1]) + return int(match.group(0).split(':')[1]) os.unlink(filename) raise AssertionError('No recovery position identified') @@ -409,28 +402,24 @@ def _get_gcomm_uuid(self): return None def _install_new_database(self, timeout=30): - """run the mysql_install_db installer and set up system users""" + """run the mariadb-install-db installer and set up system users""" script_setusers = r""" SET @@SESSION.SQL_LOG_BIN=0; - DELETE FROM mysql.user WHERE user='root' AND host!='localhost'; DELETE FROM mysql.user WHERE user=''; - UPDATE mysql.user set host='%%' where user='root' and host='localhost'; - CREATE USER 'sst'@'localhost' IDENTIFIED BY '%(sst_password)s'; - GRANT RELOAD,PROCESS,LOCK TABLES,REPLICATION CLIENT ON *.* TO - 'sst'@'localhost'; + RENAME USER 'root'@'localhost' TO 'root'@'%'; DROP DATABASE IF EXISTS test; FLUSH PRIVILEGES; """ logging.info({'action': '_install_new_database', 'status': 'start'}) opts = '--user=mysql --datadir=%s --wsrep_on=OFF' % self.data_dir - mysql_client = '/usr/bin/mysql --protocol=socket -u root' - sys.stdout.write(self._invoke('mysql_install_db %s --rpm' % + mysql_client = '/usr/bin/mariadb --protocol=socket -u root' + sys.stdout.write(self._invoke('mariadb-install-db %s --rpm' % opts + ' --no-defaults')) start_time = time.time() proc = self._run_background( - 'exec /usr/sbin/mysqld %s --skip-networking' % opts) + 'exec /usr/sbin/mariadbd %s --skip-networking' % opts) while time.time() - start_time < timeout: time.sleep(1) if self._invoke('%s -e "SELECT 1;"' % mysql_client @@ -444,10 +433,10 @@ def _install_new_database(self, timeout=30): exit(1) logging.info({'action': '_install_new_database', 'step': '0'}) sys.stdout.write(self._invoke( - 'mysqladmin password "%s"' % self.root_password, + 'mariadb-admin password "%s"' % self.root_password, ignore_errors=False, suppress_log=True)) sys.stdout.write(self._invoke( - 'mysql_tzinfo_to_sql /usr/share/zoneinfo | ' + 'mariadb-tzinfo-to-sql /usr/share/zoneinfo | ' 'sed "s/Local time zone must be set--see zic manual page/FCTY/" | ' '%s mysql -u root -p%s' % (mysql_client, self.root_password), ignore_errors=False)) @@ -456,9 +445,8 @@ def _install_new_database(self, timeout=30): '%(mysql)s -u root -p%(mysql_root_password)s -e "%(script)s"' % { 'mysql': mysql_client, 'mysql_root_password': self.root_password, - 'script': script_setusers % { - 'sst_password': self.sst_password - }}, ignore_errors=False, suppress_log=False)) + 'script': script_setusers}, + ignore_errors=False, suppress_log=False)) logging.info({'action': '_install_new_database', 'step': '2'}) time.sleep(60) proc.terminate() @@ -507,7 +495,7 @@ def _update_cluster_address(self): else: self.update_timer_active = False self._invoke( - 'mysql -u root -p%(pw)s -e ' + 'mariadb -u root -p%(pw)s -e ' '"SET GLOBAL wsrep_cluster_address=\'%(address)s\'";' % {'pw': self.root_password, 'address': address}) self.prev_address = address @@ -559,33 +547,41 @@ class DiscoveryService(object): def __init__(self, nodes, cluster): self.ipv4 = socket.gethostbyname(socket.gethostname()) - self.etcd = etcd.Client(host=nodes, allow_reconnect=True, - lock_prefix='/%s/_locks' % cluster) + host, port = nodes[0] + try: + self.etcd = etcd3.client(host=host, port=port, + timeout=Constants.ETCD_RETRY_WAIT) + self.etcd.status() + except Exception as exc: + print(f"Etcd client failed: {exc}") + raise self.prefix = Constants.ETCD_PREFIX + '/' + cluster + self.lock_prefix = '/%s/_locks' % cluster try: self.ttl = int(os.environ['TTL']) except KeyError: self.ttl = Constants.DEFAULT_TTL self.ttl_dir = Constants.TTL_DIR self.locks = {} + self.cluster = cluster + logging.info({'action': 'etcd3_init', 'host': host, 'port': port, + 'prefix': self.prefix, 'cluster': cluster}) def __del__(self): self.delete_key(self.ipv4) def set_key(self, keyname, value, my_host=True, ttl=None): - """set a key under /galera//""" + """set a key under /galera//, with a + lease duration as specified by ttl""" + ttl = ttl if ttl else Constants.DEFAULT_TTL logging.debug({'action': 'set_key', 'keyname': keyname, - 'value': value}) + 'value': value, 'ttl': ttl}) key_path = self.prefix + '/' + self.ipv4 if my_host else self.prefix - try: - self.etcd.write(key_path, None, dir=True, ttl=self.ttl_dir) - except etcd.EtcdNotFile: - pass - self.etcd.write('%(key_path)s/%(keyname)s' % { - 'key_path': key_path, 'keyname': keyname - }, - value, ttl=ttl if ttl else Constants.DEFAULT_TTL) + self.etcd.put(key_path, '', lease=self.etcd.lease(ttl)) + self.etcd.put('%(key_path)s/%(keyname)s' % + {'key_path': key_path, 'keyname': keyname}, + str(value), lease=self.etcd.lease(ttl)) def get_key(self, keyname, ipv4=None): """Fetch the key for a given ipv4 node @@ -596,58 +592,63 @@ def get_key(self, keyname, ipv4=None): log_info = {'action': 'get_key', 'keyname': keyname, 'ipv4': ipv4} key_path = self.prefix + '/' + ipv4 if ipv4 else self.prefix key_path += '/' + keyname if keyname else '' - try: - item = self.etcd.read(key_path, timeout=10) - except (etcd.EtcdKeyNotFound, etcd.EtcdNotDir): - logging.debug(dict(log_info, **{ - 'status': 'error', - 'message': 'not_found'})) - return None - - log_info['status'] = 'ok' - if item.dir: - retval = [child.key[len(key_path) + 1:] - for child in item.children] - return retval - else: - logging.debug(dict(log_info, **{'value': item.value})) - return item.value + item = self.etcd.get(key_path) + if item[1]: + log_info['status'] = 'ok' + logging.debug(dict(log_info, **{'value': item[0].decode("utf-8")})) + return item[0].decode("utf-8") + children = self.etcd.get_prefix(key_path, keys_only=True) + if children: + log_info['status'] = 'ok' + retval = [child[1].key.decode("utf-8").removeprefix( + self.prefix + '/') for child in children] + if len(retval) > 0: + logging.debug(dict(log_info, **{'values': retval})) + return retval + + logging.debug(dict(log_info, **{ + 'status': 'error', + 'message': 'not_found'})) + return None def delete_key(self, keyname, ipv4=None): log_info = {'action': 'delete_key', 'keyname': keyname, 'ipv4': ipv4} key_path = self.prefix + '/' + ipv4 if ipv4 else self.prefix key_path += '/' + keyname if keyname else '' - try: - self.etcd.delete(key_path, recursive=True) + ret = self.etcd.delete_prefix(key_path) + if ret: logging.debug(dict(log_info, **{'status': 'ok'})) - except etcd.EtcdKeyNotFound: + else: logging.debug(dict(log_info, **{ 'status': 'error', 'message': 'not_found'})) - def get_key_recursive(self, keyname, ipv4=None, nest_level=0): + def get_key_recursive(self, keyname, ipv4=None): """Fetch all keys under the given node """ - assert nest_level < 10, 'Recursion too deep' - retval = self.get_key(keyname, ipv4=ipv4) - if type(retval) is list: - return {key: self.get_key_recursive(key, ipv4=ipv4, - nest_level=nest_level + 1) - for key in retval} - else: - return retval + retval = {meta.key.decode('utf-8').removeprefix( + self.prefix + '/' + (ipv4 + '/' if ipv4 else '')): + val.decode('utf-8') + for val, meta in self.etcd.get_prefix( + self.prefix + '/' + ipv4 + + ('/' + keyname if keyname else ''))} + logging.debug({'action': 'get_key_recursive', + 'keyname': keyname, 'ipv4': ipv4, + 'retval': retval}) + return retval def acquire_lock(self, lock_name, ttl=Constants.DEFAULT_TTL): """acquire cluster lock - used upon electing leader""" logging.info({'action': 'acquire_lock', 'lock_name': lock_name, 'ttl': ttl}) - self.locks[lock_name] = etcd.Lock(self.etcd, lock_name) - # TODO: make this an atomic mutex with etcd3 (currently using etcd2) + self.locks[lock_name] = self.etcd.lock(self.lock_prefix + lock_name, + ttl=ttl) + # TODO: make this an atomic mutex with etcd3 while self.get_key('lock-%s' % lock_name): time.sleep(0.25) self.set_key('lock-%s' % lock_name, self.ipv4, my_host=False, ttl=ttl) - self.locks[lock_name].acquire(lock_ttl=2) + self.locks[lock_name].acquire(timeout=2) def release_lock(self, lock_name): """release cluster lock""" @@ -682,7 +683,15 @@ def setup_logging(level=logging.INFO, output=sys.stdout): def main(): - setup_logging() + level = logging.INFO + if 'LOG_LEVEL' in os.environ: + if os.environ['LOG_LEVEL'].lower() == 'debug': + level = logging.DEBUG + elif os.environ['LOG_LEVEL'].lower() != 'info': + logging.error({'action': 'main', + 'level': os.environ['LOG_LEVEL'], + 'message': 'invalid log_level'}) + setup_logging(level=level) cluster = MariaDBCluster() logging.info({'action': 'main', 'status': 'start', 'my_ipv4': cluster.my_ipv4}) diff --git a/images/mariadb-galera/src/healthcheck.sh b/images/mariadb-galera/src/healthcheck.sh index 9bfd78ed..a2b2df66 100755 --- a/images/mariadb-galera/src/healthcheck.sh +++ b/images/mariadb-galera/src/healthcheck.sh @@ -15,7 +15,7 @@ MYSQL_OPTS="-N -q -A --connect-timeout=10" TMP_FILE="/dev/shm/mysqlchk.$$.out" ERR_FILE="/dev/shm/mysqlchk.$$.err" FORCE_FAIL="/dev/shm/proxyoff" -MYSQL_BIN='/usr/bin/mysql' +MYSQL_BIN='/usr/bin/mariadb' CHECK_QUERY="show global status where variable_name='wsrep_local_state'" CHECK_QUERY2="show global variables where variable_name='wsrep_sst_method'" CHECK_QUERY3="show global variables where variable_name='read_only'"