refactor main_rotation_cycle to streamline node processing and container management
This commit is contained in:
146
rotate/rotate.py
146
rotate/rotate.py
@ -281,97 +281,83 @@ def main_rotation_cycle():
|
||||
|
||||
logger.info("Starting nexus rotation cycle")
|
||||
|
||||
while True:
|
||||
cycle_count += 1
|
||||
logger.info(f"=== Starting cycle #{cycle_count} ===")
|
||||
|
||||
try:
|
||||
# Get next node
|
||||
node = get_next_node(grist, logger)
|
||||
if not node:
|
||||
logger.error("No node available, waiting 60 seconds before retry")
|
||||
time.sleep(60)
|
||||
continue
|
||||
# Get next node
|
||||
node = get_next_node(grist, logger)
|
||||
if not node:
|
||||
logger.error("No node available, waiting 60 seconds before retry")
|
||||
time.sleep(60)
|
||||
return
|
||||
|
||||
node_id = node.NodeID
|
||||
current_hours = int(node.Hours)
|
||||
node_id = node.NodeID
|
||||
current_hours = int(node.Hours)
|
||||
|
||||
# Update hours (+1 before starting container)
|
||||
new_hours = current_hours + 1
|
||||
grist.update(node.id, {"Hours": new_hours}, "Nodes")
|
||||
logger.info(f"Updated node {node_id} hours: {current_hours} -> {new_hours}")
|
||||
# Update hours (+1 before starting container)
|
||||
new_hours = current_hours + 1
|
||||
grist.update(node.id, {"Hours": new_hours}, "Nodes")
|
||||
logger.info(f"Updated node {node_id} hours: {current_hours} -> {new_hours}")
|
||||
|
||||
# Remove any existing container with same name
|
||||
# Remove any existing container with same name
|
||||
stop_and_remove_container(container_name, logger)
|
||||
|
||||
# Start new container
|
||||
if not start_container(container_name, node_id, logger):
|
||||
logger.error(f"Failed to start container for node {node_id}")
|
||||
# Return the hour back since container didn't start
|
||||
grist.update(node.id, {"Hours": current_hours}, "Nodes")
|
||||
logger.info(f"Reverted node {node_id} hours back to: {current_hours}")
|
||||
time.sleep(60)
|
||||
return
|
||||
|
||||
logger.info(f"Container started successfully for node {node_id}")
|
||||
|
||||
# Wait 5 hours with progress updates and health checks every 10 minutes
|
||||
wait_hours = 5
|
||||
total_minutes = wait_hours * 60
|
||||
interval_minutes = 10
|
||||
|
||||
logger.info(f"Waiting {wait_hours} hours ({total_minutes} minutes) for node {node_id}")
|
||||
|
||||
container_failed = False
|
||||
for elapsed_minutes in range(0, total_minutes, interval_minutes):
|
||||
remaining_minutes = total_minutes - elapsed_minutes
|
||||
remaining_hours = remaining_minutes // 60
|
||||
remaining_mins = remaining_minutes % 60
|
||||
|
||||
if elapsed_minutes > 0: # Skip first iteration log
|
||||
logger.info(f"Node {node_id}: {remaining_hours}h {remaining_mins}m remaining")
|
||||
|
||||
# Check container status before sleeping
|
||||
if not check_container_status(container_name, logger):
|
||||
logger.error(f"Container {container_name} is not running, attempting restart")
|
||||
stop_and_remove_container(container_name, logger)
|
||||
|
||||
# Start new container
|
||||
|
||||
if not start_container(container_name, node_id, logger):
|
||||
logger.error(f"Failed to start container for node {node_id}")
|
||||
# Return the hour back since container didn't start
|
||||
grist.update(node.id, {"Hours": current_hours}, "Nodes")
|
||||
logger.info(f"Reverted node {node_id} hours back to: {current_hours}")
|
||||
time.sleep(60)
|
||||
continue
|
||||
logger.error(f"Failed to restart container for node {node_id}")
|
||||
container_failed = True
|
||||
break
|
||||
else:
|
||||
logger.info(f"Container restarted successfully for node {node_id}")
|
||||
|
||||
time.sleep(interval_minutes * 60) # Sleep 10 minutes
|
||||
|
||||
# If container failed during the cycle, skip to next iteration
|
||||
if container_failed:
|
||||
logger.error(f"Container failed during cycle for node {node_id}, moving to next node")
|
||||
return
|
||||
|
||||
logger.info(f"Container started successfully for node {node_id}")
|
||||
|
||||
# Wait 5 hours with progress updates and health checks every 10 minutes
|
||||
wait_hours = 5
|
||||
total_minutes = wait_hours * 60
|
||||
interval_minutes = 10
|
||||
|
||||
logger.info(f"Waiting {wait_hours} hours ({total_minutes} minutes) for node {node_id}")
|
||||
|
||||
container_failed = False
|
||||
for elapsed_minutes in range(0, total_minutes, interval_minutes):
|
||||
remaining_minutes = total_minutes - elapsed_minutes
|
||||
remaining_hours = remaining_minutes // 60
|
||||
remaining_mins = remaining_minutes % 60
|
||||
|
||||
if elapsed_minutes > 0: # Skip first iteration log
|
||||
logger.info(f"Node {node_id}: {remaining_hours}h {remaining_mins}m remaining")
|
||||
|
||||
# Check container status before sleeping
|
||||
if not check_container_status(container_name, logger):
|
||||
logger.error(f"Container {container_name} is not running, attempting restart")
|
||||
stop_and_remove_container(container_name, logger)
|
||||
|
||||
if not start_container(container_name, node_id, logger):
|
||||
logger.error(f"Failed to restart container for node {node_id}")
|
||||
container_failed = True
|
||||
break
|
||||
else:
|
||||
logger.info(f"Container restarted successfully for node {node_id}")
|
||||
|
||||
time.sleep(interval_minutes * 60) # Sleep 10 minutes
|
||||
|
||||
# If container failed during the cycle, skip to next iteration
|
||||
if container_failed:
|
||||
logger.error(f"Container failed during cycle for node {node_id}, moving to next node")
|
||||
continue
|
||||
# Stop and remove container
|
||||
logger.info(f"5 hours completed for node {node_id}, stopping container")
|
||||
stop_and_remove_container(container_name, logger)
|
||||
|
||||
# Stop and remove container
|
||||
logger.info(f"5 hours completed for node {node_id}, stopping container")
|
||||
stop_and_remove_container(container_name, logger)
|
||||
# Update hours (+4 after completion)
|
||||
final_hours = new_hours + 4
|
||||
grist.update(node.id, {"Hours": final_hours}, "Nodes")
|
||||
logger.info(f"Updated node {node_id} final hours: {new_hours} -> {final_hours}")
|
||||
|
||||
# Update hours (+4 after completion)
|
||||
final_hours = new_hours + 4
|
||||
grist.update(node.id, {"Hours": final_hours}, "Nodes")
|
||||
logger.info(f"Updated node {node_id} final hours: {new_hours} -> {final_hours}")
|
||||
logger.info(f"=== Cycle #{cycle_count} completed for node {node_id} ===")
|
||||
|
||||
logger.info(f"=== Cycle #{cycle_count} completed for node {node_id} ===")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received keyboard interrupt, stopping rotation")
|
||||
stop_and_remove_container(container_name, logger)
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Cycle #{cycle_count} failed with error: {str(e)}")
|
||||
stop_and_remove_container(container_name, logger)
|
||||
logger.info("Waiting 60 seconds before next attempt")
|
||||
time.sleep(60)
|
||||
|
||||
logger.info("Nexus rotation cycle stopped")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Reference in New Issue
Block a user