HOME


Mini Shell 1.0
DIR: /opt/cloudlinux/venv/lib64/python3.11/site-packages/xray/
Upload File :
Current File : //opt/cloudlinux/venv/lib64/python3.11/site-packages/xray/agent.py
# -*- coding: utf-8 -*-

# Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2021 All Rights Reserved
#
# Licensed under CLOUD LINUX LICENSE AGREEMENT
# http://cloudlinux.com/docs/LICENSE.TXT

"""
This module contains contains classes implementing X-Ray Agent behaviour
"""
import json
import logging
import os
import re
import signal
import subprocess
import time
from threading import Thread, current_thread, Lock
from typing import Any, Optional, Dict, Tuple
from dataclasses import dataclass

from .apiclient import get_client
from .internal.constants import agent_sock, local_tasks_storage
from .internal.exceptions import XRayError, XRayAPIError
from .internal.fault_detector import FaultDetector
from .internal.local_counters import (
    open_local_storage,
    flush_memory_storage,
    get_task_ids
)
from .internal.types import Task
from .internal.user_plugin_utils import extract_creds
from .internal.utils import (
    read_sys_id,
    create_socket,
    write_sys_id,
    dbm_storage,
    get_current_cpu_throttling_time
)


@dataclass
class APIDataContainer:
    client: 'APIClient'
    task: Task


class Agent:
    """
    X-Ray Agent class
    """

    PIDFILE = '/var/run/xray-agent.pid'

    COUNTERS_FLUSH_INTERVAL = 15
    MONGO_FLUSH_INTERVAL = 60
    CLEANUP_INTERVAL = 43200  # once in 12 hours

    def __init__(self, save_pid=False, background_routine=False):
        # don't process SIGUSR2 with default handler
        signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGUSR2})

        self.logger = logging.getLogger('agent')
        self.sys_id = read_sys_id()
        write_sys_id(self.sys_id)
        # initialize ClickHouse API client to send requests data
        clickhouse_client_object = get_client('api_req')
        self.send_client = clickhouse_client_object(system_id=self.sys_id)
        # initialize Adviser API client to send requests data
        adviser_client_object = get_client('adviser')
        self.adviser_client = adviser_client_object()
        # initial state of MongoDB API client to interact with tasks
        self.task_client_object = get_client()
        # initialize storage for cache of remote API data
        self.api_data_cache_lock = Lock()
        self.api_data_cache: Dict[str, APIDataContainer] = dict()
        # initialize Fault Detector
        self.fault_detector = FaultDetector()

        if background_routine:
            # setup signal handlers
            self.signal_handler_thread = Thread(target=self._setup_signal_handler)
            self.signal_handler_thread.start()

            # start periodical database flushing
            self.flusher_thread = Thread(target=self._flusher)
            self.flusher_thread.start()

        if save_pid:
            self._save_pid()

        # start serving incoming connections=
        self.listen()

    def _wait_for_sigusr2(self):
        siginfo = signal.sigwaitinfo({signal.SIGUSR2})
        logging.info('Received SIGUSR2 from pid=%s, '
                     'flushing database storage on disk', siginfo.si_pid)

        self._flush_mongodb_counters()
        flush_memory_storage()

        logging.info('Sending signal back to process that requested storage flush')
        try:
            os.kill(siginfo.si_pid, signal.SIGUSR2)
        except OSError:
            logging.warning('Process that requested storage flush no longer exists')

    def _setup_signal_handler(self):
        """
        Setup SIGUSR2 handler that starts in-memory
        storage flush when received.

        When flushed, send SIGUSR2 back to the process that send signal.
        """
        signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGUSR2})
        while True:
            try:
                self._wait_for_sigusr2()
            except Exception:
                logging.exception('Unable to process signal, see traceback for details.')

    def _flush_mongodb_counters(self, task_id=None):
        tasks_to_flush = [task_id] if task_id is not None else get_task_ids()
        for task_id in tasks_to_flush:
            self.logger.info('Flushing task to mongo %s', task_id)

            try:
                apiclient, task = self.get_cached_or_load(task_id)
            except XRayError:
                logging.warning('Unable to get client and task %s', task_id)
                continue

            # read stored request_id
            with open_local_storage(task_id) as storage:
                task.update_with_local_data(next_request_id=storage.next_request_id)

            if task.tracing_by == 'time':
                # tracing_count for task by time represents number of minutes
                # left to active tracing and is updated by stop|continue
                # task routines only
                self.update_counts(apiclient, task.request_count)
            else:
                # tracing_count for task by request_qty depends on number of
                # collected requests, thus should be updated alongside
                self.update_counts(apiclient, task.request_count,
                                   task.tracing_count)

    def _save_pid(self):
        """
        Save agent process pid to file that other tools can use.
        """
        with open(self.PIDFILE, 'w') as f:
            f.write(str(os.getpid()))

    def _flusher(self):
        """
        This method flushes data from memory to
        local storage periodically.
        """
        last_mongo_flush_time = 0
        last_api_data_cache_cleanup = 0
        while True:
            time.sleep(self.COUNTERS_FLUSH_INTERVAL)
            if time.time() - last_mongo_flush_time > self.MONGO_FLUSH_INTERVAL:
                self._flush_mongodb_counters()
                flush_memory_storage()
                last_mongo_flush_time = time.time()
                # we should cleanup API data cache only after flushing counters
                # in order not to lose counters for already inactive tasks
                if time.time() - last_api_data_cache_cleanup > self.CLEANUP_INTERVAL:
                    self.cleanup_api_data_cache()
                    last_api_data_cache_cleanup = time.time()
            else:
                flush_memory_storage(remove=False)

    def listen(self) -> None:
        """
        Start listening socket
        """
        _socket = create_socket(agent_sock)
        while True:
            connection, address = _socket.accept()
            t = Thread(target=self.handle, args=(connection,))
            t.start()
            self.logger.debug('[%s] Started', t.name)

    def add_limit_faults(self, data, t_key, cpu_value):
        """
        Calculate throttling values and update given data
        with resulting throttling stat
        """
        data['hitting_limits'], data['throttled_time'] = self.fault_detector(
            t_key, cpu_value)

    def handle(self, connection: 'socket object') -> None:
        """
        Handle incoming connection
        :param connection: socket object usable to
        send and receive data on the connection
        """
        _pid, _uid, _gid = extract_creds(connection)
        current_cpu = get_current_cpu_throttling_time(_uid)
        fileobj = connection.makefile(errors='ignore')
        try:
            input_data = self.read_input(fileobj)
        except json.JSONDecodeError as e:
            self.logger.error('JSON decode failed: %s',
                              str(e), extra={'t_name': current_thread().name})
            connection.close()
            return

        if input_data is None:
            # save current CPU throttling time and timestamp
            # self.fault_detector.save(_pid, current_cpu)
            self.fault_detector.save(_pid, current_cpu)
            # attempt to flush expired entries
            self.fault_detector.flush()
            connection.close()
            return

        # otherwise calculate throttling fact, add it to data
        # and send gathered stat to CH
        try:
            if input_data.get('hitting_limits') is None:
                # only calculate faults if extension failed to get them itself
                self.add_limit_faults(input_data, _pid, current_cpu)
            self.__call__(input_data)
        except XRayError:
            pass
        finally:
            connection.close()

    def read_input(self, fileio: 'file object') -> Any:
        """
        Read input data and return decoded json
        :param fileio: a file-like object providing read method
        """
        data = fileio.read()
        self.logger.info('[%s] I received %i bytes trace',
                         current_thread().name, len(data.encode()))
        self.logger.debug('[%s] I received data: %s',
                          current_thread().name, data)
        if len(data.strip()) == 0:
            return
        return json.loads(data.strip(), strict=False)

    def __call__(self, decoded_data: dict) -> None:
        """
        Process received data
        """
        self.logger.info('[%s] Processing trace for task %s (%s)',
                         current_thread().name,
                         decoded_data.get('tracing_task_id'),
                         decoded_data.get('url'))
        self.process_request_data(decoded_data)

    def instantiate_mongo_client(self,
                                 fake_task_id: str) -> 'APIClient instance':
        """
        Initialize MongoDB client for current task
        """
        try:
            with dbm_storage(local_tasks_storage) as task_storage:
                try:
                    real_id = task_storage[fake_task_id].decode()
                except KeyError:
                    raise XRayError(
                        f"Cannot resolve tracing_task_id: no match found in storage",
                        extra={'id': fake_task_id,
                               'all_ids': task_storage.keys()})
        except RuntimeError as e:
            raise XRayError(f"Cannot resolve tracing_task_id: {str(e)}")

        return self.task_client_object(system_id=self.sys_id,
                                       tracing_task_id=real_id)

    @staticmethod
    def get_task_id(data: dict) -> str:
        """
        Retrieve tracing task ID from the input data
        :return: tracing task ID (fake one)
        """
        return data['tracing_task_id']

    def is_continuous_task(self, task: Task) -> bool:
        """
        Check if task is continuous
        """
        if task.is_continuous:
            self.logger.info('Task with id: %s detected as continuous',
                             str(task.task_id))
            return True
        return False

    def get_cached_or_load(self, fake_task_id: str) -> Tuple['APIClient', Task]:
        """
        Returns a client and task from cache of API data or
        initialize client and GET task from MongoDB and add to cache
        """
        self.logger.debug('Cached API data: %s', self.api_data_cache)

        cached_data = self.api_data_cache.get(fake_task_id)
        if cached_data is not None:
            return cached_data.client, cached_data.task

        apiclient = self.instantiate_mongo_client(fake_task_id)
        _t = apiclient.get_task()
        self.logger.debug('Adding new container in cache: %s --> %s, %s',
                          fake_task_id, _t, apiclient)
        self.api_data_cache[fake_task_id] = APIDataContainer(client=apiclient,
                                                             task=_t)
        return apiclient, _t

    def cleanup_api_data_cache(self) -> None:
        """
        Cleanup an API data im-memory cache dict in order not store
        inactive (stopped, already completed) tasks there
        """
        try:
            with dbm_storage(local_tasks_storage) as task_storage:
                active_tasks = [k.decode() for k in task_storage.keys()]
        except RuntimeError:
            self.logger.warning(
                'Unable to cleanup cache, storage unavailable')
            return

        for _task in list(self.api_data_cache.keys()):
            with self.api_data_cache_lock:
                if _task in self.api_data_cache and _task not in active_tasks:
                    self.logger.info('Cleaning up inactive container %s', _task)
                    self.api_data_cache.pop(_task)

    def process_request_data(self, request_data: dict) -> None:
        """
        Increment request ID in /usr/share/alt-php-xray/requests/{tracing_task_id} file
        Substitute request_id and tracing_task_id in request_data.
        Send request_data to ClickHouse
        :param request_data: original request data
        """
        t = current_thread().name
        _, task = self.get_cached_or_load(self.get_task_id(request_data))
        logging.info('Is Manual: %s', task.is_manual)
        with open_local_storage(self.get_task_id(request_data),
                                flush=task.is_manual) as storage:
            # read stored request_id
            task.update_with_local_data(next_request_id=storage.next_request_id)

            if task.tracing_count <= 0:
                self.logger.info('Tracing count is 0, nothing should be done')
                return
            # update input data with stored request_id
            updated_request_data = self.update_request_data(request_data,
                                                            task)
            # send data with updated ids
            self.logger.info('[%s] Sending to ClickHouse', t)
            self.send_client(updated_request_data)
            try:
                self.logger.info('[%s] Sending to SmartAdvice', t)
                self.adviser_client(updated_request_data)
            except XRayAPIError:
                # ignore all errors occurring within smart advice
                # microservice intercommunication
                pass
            # then increment request_id counter
            storage.next_request_id += 1
            # locally recalculate how much requests left to process
            task.update_with_local_data(next_request_id=storage.next_request_id)

        if task.is_manual:
            self._flush_mongodb_counters(task.fake_id)
        if task.tracing_by != 'time' and task.tracing_count <= 0:
            self.complete_task(task)

    def update_request_data(self, data: dict, task: Task) -> dict:
        """
        Substitute request_id and tracing_task_id
        :param data: original input
        :param task: a Task instance
        :return: updated input
        """
        data['request_id'] = task.request_count + 1
        data['tracing_task_id'] = task.task_id
        for item in data['data']:
            item['request_id'] = task.request_count + 1
            item['tracing_task_id'] = task.task_id
            if item['type'] == 'mysql_query':
                item['query'] = self.hide_symbols(item['query'])

        self.logger.info('[%s] Input updated => %s & %s',
                         current_thread().name,
                         data.get('tracing_task_id'), data.get('request_id'))
        self.logger.debug('[%s] Full updated input %s',
                          current_thread().name, data)
        return data

    def update_counts(self, client: 'APIClient instance',
                      request_count: int,
                      tracing_count: Optional[int] = None) -> None:
        """
        Update task counters in mongodb instance
        """
        client.update_counts_only(tracing_count=tracing_count,
                                  request_count=request_count)

    def complete_task(self, _task: Task) -> None:
        """
        Stop and complete request_qty task
        :param _task: tracing task to stop
        """
        self.logger.info('[%s] Task %s should be completed',
                         current_thread().name,
                         _task.task_id)
        # delay for MongoDB to process counts, received lately (see XRAY-87)
        time.sleep(1)
        self._run_complete_task_cmd(_task.task_id)

    def _run_complete_task_cmd(self, task_id):
        subprocess.check_output([
            'cloudlinux-xray-manager', 'stop',
            '--system_id', self.sys_id,
            '--tracing_task_id', task_id
        ])

    @staticmethod
    def hide_symbols(mysql_query: str) -> str:
        """
        Sanitize data in single quotes from MySQL query
        """
        def replacer(m):
            """
            Works with whole string in single or double quotes
            """
            q = m.group('quote')
            t = m.group('trunc')

            def inner_repl(inner_m):
                """
                Works with characters inside quotes
                """
                if inner_m.group('digit'):
                    return '0'
                elif inner_m.group('symbol'):
                    return 'x'

            sanitized = re.sub(r"((?P<digit>\d)|(?P<symbol>[^0-9_:;\-/',. \\]))",
                               inner_repl, m.group('in_quote'))
            # wrap sanitized string back with originally detected characters
            # (quotes/truncation marker)
            return f'{q}{sanitized}{t or q}'

        # string either wrapped in quotes (single or double) or
        # starting from quote and finishing with ... (truncation marker)
        # including escaped with either / or \ quote
        pattern = re.compile(r"""(?P<quote>['"])(?P<in_quote>.*?)((?<![\\|/])(?P=quote)|(?P<trunc>\.{3}))""")
        return re.sub(pattern, replacer, mysql_query)