Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try adding strace #3505

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os
import shutil
import signal
import subprocess
import sys
import threading
import time
import traceback

from django.http import HttpResponse
from django.http import HttpResponse, FileResponse
from django.urls import path


Expand All @@ -21,6 +24,33 @@ def handle_sigterm(signo, sf):
SECRET_KEY = "fdsfdasfa"
ALLOWED_HOSTS = ["*"]

# Path for the `strace` output file
STRACE_OUTPUT_FILE = "/tmp/strace_output.log"
MAX_RESPONSE_SIZE = 100 * 1024
strace_process = None

# Function to run `strace` in a background thread
def start_strace():
global strace_process
pid = os.getpid()
cmd = ["strace", "-f", "-o", STRACE_OUTPUT_FILE, "-p", str(pid)]
try:
strace_process = subprocess.Popen(cmd)
strace_process.wait() # Wait for the strace process to finish
except Exception as e:
print(f"Error running strace: {e}", file=sys.stderr)


# Start the `strace` thread
def start_strace_thread():
thread = threading.Thread(target=start_strace, daemon=True)
thread.start()


# Trigger the strace thread on app startup
start_strace_thread()



def index(request):
return HttpResponse("test")
Expand Down Expand Up @@ -56,12 +86,19 @@ def child_pids(request):
status_path = f"/proc/{pid}/status"
try:
with open(status_path, "r") as status_file:
ppid = None
name = None
for line in status_file:
if line.startswith("PPid:"):
ppid = int(line.split()[1])
if ppid == current_pid:
child_pids.append(pid)
if line.startswith("Name:"):
name = line.split()[1]
if ppid is not None and name is not None:
break

# Check if the process is a child and not named "strace"
if ppid == current_pid and name != "strace":
child_pids.append(pid)
except (FileNotFoundError, PermissionError):
# Process might have terminated or we don't have permission
continue
Expand Down Expand Up @@ -109,10 +146,62 @@ def zombies(request):
return HttpResponse(f"Error: {str(e)}", status=500, content_type="text/plain")


def kill_strace(request):
try:
# Iterate through all processes in /proc
for pid in os.listdir("/proc"):
if pid.isdigit(): # Ensure the entry is a PID directory
try:
# Read the process name from /proc/<pid>/comm
with open(f"/proc/{pid}/comm", "r") as comm_file:
process_name = comm_file.read().strip()

# Check if the process is `strace`
if process_name == "strace":
print(f"Killing strace process with PID: {pid}")
os.kill(int(pid), signal.SIGTERM) # Send SIGTERM to the process
except (FileNotFoundError, ProcessLookupError, PermissionError):
# Ignore processes that no longer exist or are inaccessible
continue

return HttpResponse(f"strace processes terminated successfully.", content_type="text/plain")
except Exception as e:
# Capture the full traceback
error_details = traceback.format_exc()
return HttpResponse(
f"An error occurred:\n\n{error_details}",
content_type="text/plain",
status=500
)

def download_strace(request):
try:
if os.path.exists(STRACE_OUTPUT_FILE):
original_file_size = os.path.getsize(STRACE_OUTPUT_FILE)

with open(STRACE_OUTPUT_FILE, "r") as f:
content = f.read()

response_content = f"File size: {original_file_size} bytes\n\n{content}"
return HttpResponse(response_content, content_type="text/plain")
else:
return HttpResponse("Strace file not found.", status=404, content_type="text/plain")
except Exception as e:
# Capture the full traceback
error_details = traceback.format_exc()
return HttpResponse(
f"An error occurred:\n\n{error_details}",
content_type="text/plain",
status=500
)


urlpatterns = [
path("", index),
path("crashme", crashme),
path("fork_and_crash", fork_and_crash),
path("child_pids", child_pids),
path("zombies", zombies),
path("download_strace", download_strace),
path("kill_strace", kill_strace),
]
21 changes: 19 additions & 2 deletions tests/auto_inject/test_auto_inject_install.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import re
from utils import scenarios, features, flaky, irrelevant, context
from utils.tools import logger
from utils.onboarding.weblog_interface import warmup_weblog, get_child_pids, get_zombies, fork_and_crash
from utils.onboarding.weblog_interface import warmup_weblog, get_child_pids, get_zombies, fork_and_crash, get_strace, kill_strace
from utils import scenarios, features
import tests.auto_inject.utils as base
from utils.virtual_machine.utils import parametrize_virtual_machines
import time


@features.host_auto_installation_script
Expand Down Expand Up @@ -96,7 +97,6 @@ class TestContainerAutoInjectInstallScriptCrashTracking_NoZombieProcess(base.Aut
],
reason="Zombies only appears in containers",
)
@flaky(library="python", reason="APMLP-313")
def test_crash_no_zombie(self, virtual_machine):
vm_ip = virtual_machine.get_ip()
vm_port = virtual_machine.deffault_open_port
Expand All @@ -122,6 +122,8 @@ def test_crash_no_zombie(self, virtual_machine):
logger.warning("Failure process tree: " + process_tree)
raise

time.sleep(5)

# At this point, there should be no zombies and no child pids
child_pids = get_child_pids(virtual_machine).strip()

Expand All @@ -130,6 +132,21 @@ def test_crash_no_zombie(self, virtual_machine):
process_tree = self.execute_command(virtual_machine, "ps aux --forest")
logger.warning("Failure process tree: " + process_tree)

# download the strace file
try:
killed = kill_strace(virtual_machine)
logger.warning("Killing strace: " + killed)

process_tree = self.execute_command(virtual_machine, "ps aux --forest")
logger.warning("Process tree after killing strace: " + process_tree)

strace = get_strace(virtual_machine)
logger.warning("Strace output: " + strace)
except:
process_tree = self.execute_command(virtual_machine, "ps aux --forest")
logger.warning("Process tree after strace: " + process_tree)
raise

assert child_pids == ""

zombies = get_zombies(virtual_machine).strip()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ services:
image: system-tests/local:latest
ports:
- 5985:18080
cap_add:
- SYS_PTRACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ FROM public.ecr.aws/docker/library/python:3.12
ENV PYTHONUNBUFFERED 1
ENV DJANGO_SETTINGS_MODULE django_app
WORKDIR /src

RUN apt-get update && apt-get install -y strace

ADD . /src
RUN pip install django
EXPOSE 18080
Expand Down
14 changes: 14 additions & 0 deletions utils/onboarding/weblog_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,20 @@ def get_child_pids(virtual_machine) -> str:
return requests.get(url, timeout=60).text


def get_strace(virtual_machine) -> str:
vm_ip = virtual_machine.get_ip()
vm_port = virtual_machine.deffault_open_port
url = f"http://{vm_ip}:{vm_port}/download_strace"
return requests.get(url, timeout=240).text


def kill_strace(virtual_machine) -> str:
vm_ip = virtual_machine.get_ip()
vm_port = virtual_machine.deffault_open_port
url = f"http://{vm_ip}:{vm_port}/kill_strace"
return requests.get(url, timeout=30).text


def get_zombies(virtual_machine) -> str:
vm_ip = virtual_machine.get_ip()
vm_port = virtual_machine.deffault_open_port
Expand Down
Loading