refactor playbook for improved clarity and structure

This commit is contained in:
vvzvlad 2025-07-27 20:35:01 +03:00
parent e7cca8d6c0
commit 3d07dc47ed
3 changed files with 309 additions and 0 deletions

8
rotate/grist.json Normal file
View File

@ -0,0 +1,8 @@
{
"grist_server": "https://grist.vvzvlad.xyz",
"grist_doc_id": "inwaCJSGxZA1u64QJmQBEi",
"grist_api_key": "6bbcce2a64e7c865fbb2e5ec4480f0e1328f317f"
}

11
rotate/requirements.txt Normal file
View File

@ -0,0 +1,11 @@
ansible-output-parser==0.1.0
certifi==2024.7.4
charset-normalizer==3.3.2
colorama==0.4.6
future==1.0.0
grist-api==0.1.0
idna==3.8
proxmoxer==2.1.0
PyYAML==6.0.2
requests==2.32.3
urllib3==2.2.2

290
rotate/rotate.py Normal file
View File

@ -0,0 +1,290 @@
# flake8: noqa
# pylint: disable=broad-exception-raised, raise-missing-from, too-many-arguments, redefined-outer-name
# pylance: disable=reportMissingImports, reportMissingModuleSource, reportGeneralTypeIssues
# type: ignore
import re
from datetime import datetime, timedelta, timezone
import subprocess
import os
import time
import random
import sys
import json
from grist_api import GristDocAPI
import colorama
import logging
import socket
class GRIST:
def __init__(self, server, doc_id, api_key, logger):
self.server = server
self.doc_id = doc_id
self.api_key = api_key
self.logger = logger
self.grist = GristDocAPI(doc_id, server=server, api_key=api_key)
def table_name_convert(self, table_name):
return table_name.replace(" ", "_")
def to_timestamp(self, dtime: datetime) -> int:
if dtime.tzinfo is None:
dtime = dtime.replace(tzinfo=timezone(timedelta(hours=3)))
return int(dtime.timestamp())
def insert_row(self, data, table):
data = {key.replace(" ", "_"): value for key, value in data.items()}
row_id = self.grist.add_records(self.table_name_convert(table), [data])
return row_id
def update_column(self, row_id, column_name, value, table):
if isinstance(value, datetime):
value = self.to_timestamp(value)
column_name = column_name.replace(" ", "_")
self.grist.update_records(self.table_name_convert(table), [{ "id": row_id, column_name: value }])
def delete_row(self, row_id, table):
self.grist.delete_records(self.table_name_convert(table), [row_id])
def update(self, row_id, updates, table):
for column_name, value in updates.items():
if isinstance(value, datetime):
updates[column_name] = self.to_timestamp(value)
updates = {column_name.replace(" ", "_"): value for column_name, value in updates.items()}
self.grist.update_records(self.table_name_convert(table), [{"id": row_id, **updates}])
def fetch_table(self, table):
return self.grist.fetch_table(self.table_name_convert(table))
def find_record(self, id=None, state=None, name=None, table=None):
if table is None:
raise ValueError("Table is not specified")
table_data = self.grist.fetch_table(self.table_name_convert(table))
if id is not None:
record = [row for row in table_data if row.id == id]
return record
if state is not None and name is not None:
record = [row for row in table_data if row.State == state and row.name == name]
return record
if state is not None:
record = [row for row in table_data if row.State == state]
return record
if name is not None:
record = [row for row in table_data if row.Name == name]
return record
def find_settings(self, key, table):
table = self.fetch_table(self.table_name_convert(table))
for record in table:
if record.Setting == key:
if record.Value is None or record.Value == "":
raise ValueError(f"Setting {key} blank")
return record.Value
raise ValueError(f"Setting {key} not found")
def run_docker_command(command, logger):
"""Execute docker command and return success status"""
try:
result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30)
if result.returncode == 0:
logger.info(f"Docker command successful: {command}")
if result.stdout.strip():
logger.info(f"Docker output: {result.stdout.strip()}")
return True
else:
logger.error(f"Docker command failed: {command}, error: {result.stderr.strip()}")
return False
except subprocess.TimeoutExpired:
logger.error(f"Docker command timed out: {command}")
return False
except Exception as e:
logger.error(f"Docker command exception: {command}, error: {str(e)}")
return False
def stop_and_remove_container(container_name, logger):
"""Stop and remove docker container"""
logger.info(f"Stopping container: {container_name}")
run_docker_command(f"docker stop {container_name}", logger)
logger.info(f"Removing container: {container_name}")
run_docker_command(f"docker rm {container_name}", logger)
def check_container_status(container_name, logger):
"""Check if container is running"""
try:
result = subprocess.run(f"docker ps --filter name={container_name} --format '{{{{.Status}}}}'",
shell=True, capture_output=True, text=True, timeout=10)
if result.returncode == 0 and result.stdout.strip():
status = result.stdout.strip()
if "Up" in status:
return True
else:
logger.warning(f"Container {container_name} status: {status}")
return False
else:
logger.warning(f"Container {container_name} not found in running containers")
return False
except Exception as e:
logger.error(f"Failed to check container {container_name} status: {str(e)}")
return False
def start_container(container_name, node_id, logger):
"""Start nexus container with given node_id"""
docker_command = f"docker run -td --init --name {container_name} nexusxyz/nexus-cli:latest start --node-id {node_id}"
logger.info(f"Starting container with node-id: {node_id}")
return run_docker_command(docker_command, logger)
def get_next_node(grist, logger):
"""Get node with lowest hours"""
try:
nodes = grist.fetch_table(table="Nodes")
nodes = [row for row in nodes if row.NodeID != "1"]
if not nodes:
logger.error("No available nodes found in table")
return None
nodes.sort(key=lambda node: int(node.Hours))
selected_node = nodes[0]
logger.info(f"Selected node: ID={selected_node.NodeID}, Hours={selected_node.Hours}")
return selected_node
except Exception as e:
logger.error(f"Failed to get next node: {str(e)}")
return None
def main_rotation_cycle():
"""Main rotation cycle for nexus nodes"""
colorama.init(autoreset=True)
logger = logging.getLogger("NexusRotator")
logger.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
# Load grist configuration
try:
with open('grist.json', 'r', encoding='utf-8') as f:
grist_data = json.loads(f.read())
except Exception as e:
logger.error(f"Failed to load grist.json: {str(e)}")
return
# Initialize Grist connection
try:
grist = GRIST(grist_data.get('grist_server'), grist_data.get('grist_doc_id'), grist_data.get('grist_api_key'), logger)
logger.info("Connected to Grist successfully")
except Exception as e:
logger.error(f"Failed to connect to Grist: {str(e)}")
return
container_name = "nexus"
cycle_count = 0
logger.info("Starting nexus rotation cycle")
while True:
cycle_count += 1
logger.info(f"=== Starting cycle #{cycle_count} ===")
try:
# Get next node
node = get_next_node(grist, logger)
if not node:
logger.error("No node available, waiting 60 seconds before retry")
time.sleep(60)
continue
node_id = node.NodeID
current_hours = int(node.Hours)
# Update hours (+1 before starting container)
new_hours = current_hours + 1
grist.update(node.id, {"Hours": new_hours}, "Nodes")
logger.info(f"Updated node {node_id} hours: {current_hours} -> {new_hours}")
# Remove any existing container with same name
stop_and_remove_container(container_name, logger)
# Start new container
if not start_container(container_name, node_id, logger):
logger.error(f"Failed to start container for node {node_id}")
# Return the hour back since container didn't start
grist.update(node.id, {"Hours": current_hours}, "Nodes")
logger.info(f"Reverted node {node_id} hours back to: {current_hours}")
time.sleep(60)
continue
logger.info(f"Container started successfully for node {node_id}")
# Wait 5 hours with progress updates and health checks every 10 minutes
wait_hours = 5
total_minutes = wait_hours * 60
interval_minutes = 10
logger.info(f"Waiting {wait_hours} hours ({total_minutes} minutes) for node {node_id}")
container_failed = False
for elapsed_minutes in range(0, total_minutes, interval_minutes):
remaining_minutes = total_minutes - elapsed_minutes
remaining_hours = remaining_minutes // 60
remaining_mins = remaining_minutes % 60
if elapsed_minutes > 0: # Skip first iteration log
logger.info(f"Node {node_id}: {remaining_hours}h {remaining_mins}m remaining")
# Check container status before sleeping
if not check_container_status(container_name, logger):
logger.error(f"Container {container_name} is not running, attempting restart")
stop_and_remove_container(container_name, logger)
if not start_container(container_name, node_id, logger):
logger.error(f"Failed to restart container for node {node_id}")
container_failed = True
break
else:
logger.info(f"Container restarted successfully for node {node_id}")
time.sleep(interval_minutes * 60) # Sleep 10 minutes
# If container failed during the cycle, skip to next iteration
if container_failed:
logger.error(f"Container failed during cycle for node {node_id}, moving to next node")
continue
# Stop and remove container
logger.info(f"5 hours completed for node {node_id}, stopping container")
stop_and_remove_container(container_name, logger)
# Update hours (+4 after completion)
final_hours = new_hours + 4
grist.update(node.id, {"Hours": final_hours}, "Nodes")
logger.info(f"Updated node {node_id} final hours: {new_hours} -> {final_hours}")
logger.info(f"=== Cycle #{cycle_count} completed for node {node_id} ===")
except KeyboardInterrupt:
logger.info("Received keyboard interrupt, stopping rotation")
stop_and_remove_container(container_name, logger)
break
except Exception as e:
logger.error(f"Cycle #{cycle_count} failed with error: {str(e)}")
stop_and_remove_container(container_name, logger)
logger.info("Waiting 60 seconds before next attempt")
time.sleep(60)
logger.info("Nexus rotation cycle stopped")
if __name__ == "__main__":
main_rotation_cycle()