# -*- coding: utf-8 -*-
# Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2021 All Rights Reserved
#
# Licensed under CLOUD LINUX LICENSE AGREEMENT
# http://cloudlinux.com/docs/LICENSE.TXT
"""
This module contains contains classes implementing X-Ray Agent behaviour
"""
import json
import logging
import os
import re
import signal
import subprocess
import time
from threading import Thread, current_thread, Lock
from typing import Any, Optional, Dict, Tuple
from dataclasses import dataclass
from .apiclient import get_client
from .internal.constants import agent_sock, local_tasks_storage
from .internal.exceptions import XRayError, XRayAPIError
from .internal.fault_detector import FaultDetector
from .internal.local_counters import (
open_local_storage,
flush_memory_storage,
get_task_ids
)
from .internal.types import Task
from .internal.user_plugin_utils import extract_creds
from .internal.utils import (
read_sys_id,
create_socket,
write_sys_id,
dbm_storage,
get_current_cpu_throttling_time
)
@dataclass
class APIDataContainer:
client: 'APIClient'
task: Task
class Agent:
"""
X-Ray Agent class
"""
PIDFILE = '/var/run/xray-agent.pid'
COUNTERS_FLUSH_INTERVAL = 15
MONGO_FLUSH_INTERVAL = 60
CLEANUP_INTERVAL = 43200 # once in 12 hours
def __init__(self, save_pid=False, background_routine=False):
# don't process SIGUSR2 with default handler
signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGUSR2})
self.logger = logging.getLogger('agent')
self.sys_id = read_sys_id()
write_sys_id(self.sys_id)
# initialize ClickHouse API client to send requests data
clickhouse_client_object = get_client('api_req')
self.send_client = clickhouse_client_object(system_id=self.sys_id)
# initialize Adviser API client to send requests data
adviser_client_object = get_client('adviser')
self.adviser_client = adviser_client_object()
# initial state of MongoDB API client to interact with tasks
self.task_client_object = get_client()
# initialize storage for cache of remote API data
self.api_data_cache_lock = Lock()
self.api_data_cache: Dict[str, APIDataContainer] = dict()
# initialize Fault Detector
self.fault_detector = FaultDetector()
if background_routine:
# setup signal handlers
self.signal_handler_thread = Thread(target=self._setup_signal_handler)
self.signal_handler_thread.start()
# start periodical database flushing
self.flusher_thread = Thread(target=self._flusher)
self.flusher_thread.start()
if save_pid:
self._save_pid()
# start serving incoming connections=
self.listen()
def _wait_for_sigusr2(self):
siginfo = signal.sigwaitinfo({signal.SIGUSR2})
logging.info('Received SIGUSR2 from pid=%s, '
'flushing database storage on disk', siginfo.si_pid)
self._flush_mongodb_counters()
flush_memory_storage()
logging.info('Sending signal back to process that requested storage flush')
try:
os.kill(siginfo.si_pid, signal.SIGUSR2)
except OSError:
logging.warning('Process that requested storage flush no longer exists')
def _setup_signal_handler(self):
"""
Setup SIGUSR2 handler that starts in-memory
storage flush when received.
When flushed, send SIGUSR2 back to the process that send signal.
"""
signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGUSR2})
while True:
try:
self._wait_for_sigusr2()
except Exception:
logging.exception('Unable to process signal, see traceback for details.')
def _flush_mongodb_counters(self, task_id=None):
tasks_to_flush = [task_id] if task_id is not None else get_task_ids()
for task_id in tasks_to_flush:
self.logger.info('Flushing task to mongo %s', task_id)
try:
apiclient, task = self.get_cached_or_load(task_id)
except XRayError:
logging.warning('Unable to get client and task %s', task_id)
continue
# read stored request_id
with open_local_storage(task_id) as storage:
task.update_with_local_data(next_request_id=storage.next_request_id)
if task.tracing_by == 'time':
# tracing_count for task by time represents number of minutes
# left to active tracing and is updated by stop|continue
# task routines only
self.update_counts(apiclient, task.request_count)
else:
# tracing_count for task by request_qty depends on number of
# collected requests, thus should be updated alongside
self.update_counts(apiclient, task.request_count,
task.tracing_count)
def _save_pid(self):
"""
Save agent process pid to file that other tools can use.
"""
with open(self.PIDFILE, 'w') as f:
f.write(str(os.getpid()))
def _flusher(self):
"""
This method flushes data from memory to
local storage periodically.
"""
last_mongo_flush_time = 0
last_api_data_cache_cleanup = 0
while True:
time.sleep(self.COUNTERS_FLUSH_INTERVAL)
if time.time() - last_mongo_flush_time > self.MONGO_FLUSH_INTERVAL:
self._flush_mongodb_counters()
flush_memory_storage()
last_mongo_flush_time = time.time()
# we should cleanup API data cache only after flushing counters
# in order not to lose counters for already inactive tasks
if time.time() - last_api_data_cache_cleanup > self.CLEANUP_INTERVAL:
self.cleanup_api_data_cache()
last_api_data_cache_cleanup = time.time()
else:
flush_memory_storage(remove=False)
def listen(self) -> None:
"""
Start listening socket
"""
_socket = create_socket(agent_sock)
while True:
connection, address = _socket.accept()
t = Thread(target=self.handle, args=(connection,))
t.start()
self.logger.debug('[%s] Started', t.name)
def add_limit_faults(self, data, t_key, cpu_value):
"""
Calculate throttling values and update given data
with resulting throttling stat
"""
data['hitting_limits'], data['throttled_time'] = self.fault_detector(
t_key, cpu_value)
def handle(self, connection: 'socket object') -> None:
"""
Handle incoming connection
:param connection: socket object usable to
send and receive data on the connection
"""
_pid, _uid, _gid = extract_creds(connection)
current_cpu = get_current_cpu_throttling_time(_uid)
fileobj = connection.makefile(errors='ignore')
try:
input_data = self.read_input(fileobj)
except json.JSONDecodeError as e:
self.logger.error('JSON decode failed: %s',
str(e), extra={'t_name': current_thread().name})
connection.close()
return
if input_data is None:
# save current CPU throttling time and timestamp
# self.fault_detector.save(_pid, current_cpu)
self.fault_detector.save(_pid, current_cpu)
# attempt to flush expired entries
self.fault_detector.flush()
connection.close()
return
# otherwise calculate throttling fact, add it to data
# and send gathered stat to CH
try:
if input_data.get('hitting_limits') is None:
# only calculate faults if extension failed to get them itself
self.add_limit_faults(input_data, _pid, current_cpu)
self.__call__(input_data)
except XRayError:
pass
finally:
connection.close()
def read_input(self, fileio: 'file object') -> Any:
"""
Read input data and return decoded json
:param fileio: a file-like object providing read method
"""
data = fileio.read()
self.logger.info('[%s] I received %i bytes trace',
current_thread().name, len(data.encode()))
self.logger.debug('[%s] I received data: %s',
current_thread().name, data)
if len(data.strip()) == 0:
return
return json.loads(data.strip(), strict=False)
def __call__(self, decoded_data: dict) -> None:
"""
Process received data
"""
self.logger.info('[%s] Processing trace for task %s (%s)',
current_thread().name,
decoded_data.get('tracing_task_id'),
decoded_data.get('url'))
self.process_request_data(decoded_data)
def instantiate_mongo_client(self,
fake_task_id: str) -> 'APIClient instance':
"""
Initialize MongoDB client for current task
"""
try:
with dbm_storage(local_tasks_storage) as task_storage:
try:
real_id = task_storage[fake_task_id].decode()
except KeyError:
raise XRayError(
f"Cannot resolve tracing_task_id: no match found in storage",
extra={'id': fake_task_id,
'all_ids': task_storage.keys()})
except RuntimeError as e:
raise XRayError(f"Cannot resolve tracing_task_id: {str(e)}")
return self.task_client_object(system_id=self.sys_id,
tracing_task_id=real_id)
@staticmethod
def get_task_id(data: dict) -> str:
"""
Retrieve tracing task ID from the input data
:return: tracing task ID (fake one)
"""
return data['tracing_task_id']
def is_continuous_task(self, task: Task) -> bool:
"""
Check if task is continuous
"""
if task.is_continuous:
self.logger.info('Task with id: %s detected as continuous',
str(task.task_id))
return True
return False
def get_cached_or_load(self, fake_task_id: str) -> Tuple['APIClient', Task]:
"""
Returns a client and task from cache of API data or
initialize client and GET task from MongoDB and add to cache
"""
self.logger.debug('Cached API data: %s', self.api_data_cache)
cached_data = self.api_data_cache.get(fake_task_id)
if cached_data is not None:
return cached_data.client, cached_data.task
apiclient = self.instantiate_mongo_client(fake_task_id)
_t = apiclient.get_task()
self.logger.debug('Adding new container in cache: %s --> %s, %s',
fake_task_id, _t, apiclient)
self.api_data_cache[fake_task_id] = APIDataContainer(client=apiclient,
task=_t)
return apiclient, _t
def cleanup_api_data_cache(self) -> None:
"""
Cleanup an API data im-memory cache dict in order not store
inactive (stopped, already completed) tasks there
"""
try:
with dbm_storage(local_tasks_storage) as task_storage:
active_tasks = [k.decode() for k in task_storage.keys()]
except RuntimeError:
self.logger.warning(
'Unable to cleanup cache, storage unavailable')
return
for _task in list(self.api_data_cache.keys()):
with self.api_data_cache_lock:
if _task in self.api_data_cache and _task not in active_tasks:
self.logger.info('Cleaning up inactive container %s', _task)
self.api_data_cache.pop(_task)
def process_request_data(self, request_data: dict) -> None:
"""
Increment request ID in /usr/share/alt-php-xray/requests/{tracing_task_id} file
Substitute request_id and tracing_task_id in request_data.
Send request_data to ClickHouse
:param request_data: original request data
"""
t = current_thread().name
_, task = self.get_cached_or_load(self.get_task_id(request_data))
logging.info('Is Manual: %s', task.is_manual)
with open_local_storage(self.get_task_id(request_data),
flush=task.is_manual) as storage:
# read stored request_id
task.update_with_local_data(next_request_id=storage.next_request_id)
if task.tracing_count <= 0:
self.logger.info('Tracing count is 0, nothing should be done')
return
# update input data with stored request_id
updated_request_data = self.update_request_data(request_data,
task)
# send data with updated ids
self.logger.info('[%s] Sending to ClickHouse', t)
self.send_client(updated_request_data)
try:
self.logger.info('[%s] Sending to SmartAdvice', t)
self.adviser_client(updated_request_data)
except XRayAPIError:
# ignore all errors occurring within smart advice
# microservice intercommunication
pass
# then increment request_id counter
storage.next_request_id += 1
# locally recalculate how much requests left to process
task.update_with_local_data(next_request_id=storage.next_request_id)
if task.is_manual:
self._flush_mongodb_counters(task.fake_id)
if task.tracing_by != 'time' and task.tracing_count <= 0:
self.complete_task(task)
def update_request_data(self, data: dict, task: Task) -> dict:
"""
Substitute request_id and tracing_task_id
:param data: original input
:param task: a Task instance
:return: updated input
"""
data['request_id'] = task.request_count + 1
data['tracing_task_id'] = task.task_id
for item in data['data']:
item['request_id'] = task.request_count + 1
item['tracing_task_id'] = task.task_id
if item['type'] == 'mysql_query':
item['query'] = self.hide_symbols(item['query'])
self.logger.info('[%s] Input updated => %s & %s',
current_thread().name,
data.get('tracing_task_id'), data.get('request_id'))
self.logger.debug('[%s] Full updated input %s',
current_thread().name, data)
return data
def update_counts(self, client: 'APIClient instance',
request_count: int,
tracing_count: Optional[int] = None) -> None:
"""
Update task counters in mongodb instance
"""
client.update_counts_only(tracing_count=tracing_count,
request_count=request_count)
def complete_task(self, _task: Task) -> None:
"""
Stop and complete request_qty task
:param _task: tracing task to stop
"""
self.logger.info('[%s] Task %s should be completed',
current_thread().name,
_task.task_id)
# delay for MongoDB to process counts, received lately (see XRAY-87)
time.sleep(1)
self._run_complete_task_cmd(_task.task_id)
def _run_complete_task_cmd(self, task_id):
subprocess.check_output([
'cloudlinux-xray-manager', 'stop',
'--system_id', self.sys_id,
'--tracing_task_id', task_id
])
@staticmethod
def hide_symbols(mysql_query: str) -> str:
"""
Sanitize data in single quotes from MySQL query
"""
def replacer(m):
"""
Works with whole string in single or double quotes
"""
q = m.group('quote')
t = m.group('trunc')
def inner_repl(inner_m):
"""
Works with characters inside quotes
"""
if inner_m.group('digit'):
return '0'
elif inner_m.group('symbol'):
return 'x'
sanitized = re.sub(r"((?P<digit>\d)|(?P<symbol>[^0-9_:;\-/',. \\]))",
inner_repl, m.group('in_quote'))
# wrap sanitized string back with originally detected characters
# (quotes/truncation marker)
return f'{q}{sanitized}{t or q}'
# string either wrapped in quotes (single or double) or
# starting from quote and finishing with ... (truncation marker)
# including escaped with either / or \ quote
pattern = re.compile(r"""(?P<quote>['"])(?P<in_quote>.*?)((?<![\\|/])(?P=quote)|(?P<trunc>\.{3}))""")
return re.sub(pattern, replacer, mysql_query)
|