Browse Source

Ticket 50784 - performance testing scripts

Bug Description: Everyone loves things to be fast, so to understand how to
achieve that we need metrics and observability into the server to know
what to change.

Fix Description: This adds some python based test runnenrs able to
setup and trigger ldclt against remote hosts.

For those remove hosts, this adds support for them to have systemtap
probes activated (--enable-systemtap), which can then be hooked by the
scripts in profiling/stap/ to get histograms of function latency and
timing.

This also adds the needed debug info to use mutrace (http://0pointer.de/blog/projects/mutrace.html)
to identify lock contention

https://pagure.io/389-ds-base/issue/50784

Author: William Brown <william@blackhats.net.au>

Review by: tbordaz, mreynolds (Thanks!)
tags/debian/1.4.3.2-1
William Brown firstyear 3 years ago
parent
commit
938fb3478b
12 changed files with 437 additions and 12 deletions
  1. +2
    -1
      Makefile.am
  2. +14
    -2
      configure.ac
  3. +161
    -0
      dirsrvtests/tests/perf/search_performance_test.py
  4. +16
    -0
      ldap/servers/slapd/log.c
  5. +20
    -0
      ldap/servers/slapd/opshared.c
  6. +16
    -0
      ldap/servers/slapd/search.c
  7. +64
    -0
      profiling/stap/probe_do_search_detail.stp
  8. +51
    -0
      profiling/stap/probe_log_access_detail.stp
  9. +64
    -0
      profiling/stap/probe_op_shared_search.stp
  10. +10
    -1
      src/lib389/lib389/__init__.py
  11. +19
    -7
      src/lib389/lib389/ldclt.py
  12. +0
    -1
      src/lib389/lib389/topologies.py

+ 2
- 1
Makefile.am View File

@@ -29,6 +29,7 @@ SYSTEMD_DEFINES = @systemd_defs@
CMOCKA_INCLUDES = $(CMOCKA_CFLAGS)

PROFILING_DEFINES = @profiling_defs@
SYSTEMTAP_DEFINES = @systemtap_defs@
NSPR_INCLUDES = $(NSPR_CFLAGS)

# Rust inclusions.
@@ -142,7 +143,7 @@ PATH_DEFINES = -DLOCALSTATEDIR="\"$(localstatedir)\"" -DSYSCONFDIR="\"$(sysconfd
# Now that we have all our defines in place, setup the CPPFLAGS

# These flags are the "must have" for all components
AM_CPPFLAGS = $(DEBUG_DEFINES) $(PROFILING_DEFINES) $(RUST_DEFINES)
AM_CPPFLAGS = $(DEBUG_DEFINES) $(PROFILING_DEFINES) $(SYSTEMTAP_DEFINES) $(RUST_DEFINES)
AM_CFLAGS = $(DEBUG_CFLAGS) $(GCCSEC_CFLAGS) $(ASAN_CFLAGS) $(MSAN_CFLAGS) $(TSAN_CFLAGS) $(UBSAN_CFLAGS)
AM_CXXFLAGS = $(DEBUG_CXXFLAGS) $(GCCSEC_CFLAGS) $(ASAN_CFLAGS) $(MSAN_CFLAGS) $(TSAN_CFLAGS) $(UBSAN_CFLAGS)
# Flags for Directory Server


+ 14
- 2
configure.ac View File

@@ -113,8 +113,8 @@ AC_ARG_ENABLE(debug, AS_HELP_STRING([--enable-debug], [Enable debug features (de
AC_MSG_RESULT($enable_debug)
if test "$enable_debug" = yes ; then
debug_defs="-DDEBUG -DMCC_DEBUG"
debug_cflags="-g3 -O0"
debug_cxxflags="-g3 -O0"
debug_cflags="-g3 -O0 -rdynamic"
debug_cxxflags="-g3 -O0 -rdynamic"
debug_rust_defs="-C debuginfo=2"
cargo_defs=""
rust_target_dir="debug"
@@ -253,6 +253,18 @@ fi
AC_SUBST([profiling_defs])
AC_SUBST([profiling_links])

AC_MSG_CHECKING(for --enable-systemtap)
AC_ARG_ENABLE(systemtap, AS_HELP_STRING([--enable-systemtap], [Enable systemtap probe features (default: no)]),
[], [ enable_systemtap=no ])
AC_MSG_RESULT($enable_systemtap)
if test "$enable_systemtap" = yes ; then
systemtap_defs="-DSYSTEMTAP"
else
systemtap_defs=""
fi
AC_SUBST([systemtap_defs])


# these enables are for optional or experimental features
AC_MSG_CHECKING(for --enable-pam-passthru)
AC_ARG_ENABLE(pam-passthru,


+ 161
- 0
dirsrvtests/tests/perf/search_performance_test.py View File

@@ -0,0 +1,161 @@
# --- BEGIN COPYRIGHT BLOCK ---
# Copyright (C) 2018 Red Hat, Inc.
# Copyright (C) 2019 William Brown <william@blackhats.net.au>
# All rights reserved.
#
# License: GPL (version 3 or any later version).
# See LICENSE for details.
# --- END COPYRIGHT BLOCK ---
#

# Performance tests look different to others, they require some extra
# environmental settings.

import ldap
import os
from lib389 import DirSrv
from lib389._constants import DEFAULT_SUFFIX

from lib389.topologies import topology_st as topology

from lib389.idm.domain import Domain
from lib389.idm.group import Groups
from lib389.idm.user import nsUserAccounts
from lib389.backend import Backends

from lib389.ldclt import Ldclt
import time

# We want to write a CSV such as:
# category,1 thread,4 thread,8 thread,16 thread
# testcategory,500,800,1000,2000
# testcategory2,500,800,1000,2000
TEST_MARKER = 'configured: search_performance_test.py'
# GROUP_MAX = 4000
# USER_MAX = 6000

GROUP_MAX = 4000
USER_MAX = 6000

TARGET_HOST = os.environ.get('PERF_TARGET_HOST', 'localhost')
TARGET_PORT = os.environ.get('PERF_TARGET_PORT', '389')

def assert_data_present(inst):
# Do we have the backend marker?
d = Domain(inst, DEFAULT_SUFFIX)
try:
desc = d.get_attr_val_utf8('description')
if desc == TEST_MARKER:
return
except:
# Just reset everything.
pass
# Reset the backends
bes = Backends(inst)
try:
be = bes.get(DEFAULT_SUFFIX)
be.delete()
except:
pass

be = bes.create(properties={
'nsslapd-suffix': DEFAULT_SUFFIX,
'cn': 'userRoot',
})
be.create_sample_entries('001004002')

# Load our data
# We can't use dbgen as that relies on local access :(

# Add 40,000 groups
groups = Groups(inst, DEFAULT_SUFFIX)
for i in range(1,GROUP_MAX):
rdn = 'group_{0:07d}'.format(i)
groups.create(properties={
'cn': rdn,
})

# Add 60,000 users
users = nsUserAccounts(inst, DEFAULT_SUFFIX)
for i in range(1,USER_MAX):
rdn = 'user_{0:07d}'.format(i)
users.create(properties={
'uid': rdn,
'cn': rdn,
'displayName': rdn,
'uidNumber' : '%s' % i,
'gidNumber' : '%s' % i,
'homeDirectory' : '/home/%s' % rdn,
'userPassword': rdn,
})

# Add the marker
d.replace('description', TEST_MARKER)
# Done!

# Single uid
# 1000 uid
# 4000 uid
# 5000 uid
# 10,000 uid

# & of single uid
# & of two 1000 uid sets
# & of two 4000 uid sets
# & of two 5000 uid sets
# & of two 10,000 uid sets

# | of single uid
# | of two 1000 uid sets
# | of two 4000 uid sets
# | of two 5000 uid sets
# | of two 10,000 uid sets

# & of user and group

# | of user and group

def _do_search_performance(inst, thread_count):
# Configure thread count
# Restart
print("Configuring with %s threads ..." % thread_count)
time.sleep(1)
inst.config.set('nsslapd-threadnumber', str(thread_count))
inst.restart()
ld = Ldclt(inst)
out = ld.search_loadtest(DEFAULT_SUFFIX, "(uid=user_XXXXXXX)", min=1, max=USER_MAX)
return out

# Need a check here
def test_user_search_performance():
inst = DirSrv(verbose=True)
inst.remote_simple_allocate(
f"ldaps://{TARGET_HOST}",
password="password"
)
# Need a better way to set this.
inst.host = TARGET_HOST
inst.port = TARGET_PORT
inst.open(reqcert=ldap.OPT_X_TLS_NEVER)
assert_data_present(inst)
r1 = _do_search_performance(inst, 1)
# r2 = _do_search_performance(inst, 4)
# r3 = _do_search_performance(inst, 6)
# r4 = _do_search_performance(inst, 8)
# r5 = _do_search_performance(inst, 12)
r6 = _do_search_performance(inst, 16)
# print("category,t1,t4,t6,t8,t12,t16")
# print("search,%s,%s,%s,%s,%s,%s" % (r1, r2, r3, r4, r5, r6))

def test_group_search_performance():
pass

## TODO
# Tweak cache levels
# turbo mode
# ldclt threads = 2x server?
# add perf logs to each test





+ 16
- 0
ldap/servers/slapd/log.c View File

@@ -31,6 +31,10 @@
#include <pwd.h> /* getpwnam */
#define _PSEP '/'

#ifdef SYSTEMTAP
#include <sys/sdt.h>
#endif

/**************************************************************************
* GLOBALS, defines, and ...
*************************************************************************/
@@ -2556,6 +2560,10 @@ vslapd_log_access(char *fmt, va_list ap)
int32_t rc = LDAP_SUCCESS;
time_t tnl;

#ifdef SYSTEMTAP
STAP_PROBE(ns-slapd, vslapd_log_access__entry);
#endif

/* We do this sooner, because that we we can use the message in other calls */
if ((vlen = vsnprintf(vbuf, SLAPI_LOG_BUFSIZ, fmt, ap)) == -1) {
log__error_emergency("vslapd_log_access, Unable to format message", 1, 0);
@@ -2602,8 +2610,16 @@ vslapd_log_access(char *fmt, va_list ap)
rc = -1;
}

#ifdef SYSTEMTAP
STAP_PROBE(ns-slapd, vslapd_log_access__prepared);
#endif

log_append_buffer2(tnl, loginfo.log_access_buffer, buffer, blen, vbuf, vlen);

#ifdef SYSTEMTAP
STAP_PROBE(ns-slapd, vslapd_log_access__buffer);
#endif

return (rc);
}



+ 20
- 0
ldap/servers/slapd/opshared.c View File

@@ -16,6 +16,10 @@
#include "log.h"
#include "slap.h"

#ifdef SYSTEMTAP
#include <sys/sdt.h>
#endif

#define PAGEDRESULTS_PAGE_END 1
#define PAGEDRESULTS_SEARCH_END 2

@@ -271,6 +275,10 @@ op_shared_search(Slapi_PBlock *pb, int send_result)
be_list[0] = NULL;
referral_list[0] = NULL;

#ifdef SYSTEMTAP
STAP_PROBE(ns-slapd, op_shared_search__entry);
#endif

/* get search parameters */
slapi_pblock_get(pb, SLAPI_ORIGINAL_TARGET_DN, &base);
slapi_pblock_get(pb, SLAPI_SEARCH_TARGET_SDN, &sdn);
@@ -636,6 +644,10 @@ op_shared_search(Slapi_PBlock *pb, int send_result)
}
}

#ifdef SYSTEMTAP
STAP_PROBE(ns-slapd, op_shared_search__prepared);
#endif

nentries = 0;
rc = -1; /* zero backends would mean failure */
while (be) {
@@ -940,6 +952,10 @@ op_shared_search(Slapi_PBlock *pb, int send_result)
be = next_be; /* this be won't be used for PAGED_RESULTS */
}

#ifdef SYSTEMTAP
STAP_PROBE(ns-slapd, op_shared_search__backends);
#endif

/* if referrals were sent back by the mapping tree
* add them to the list of referral in the pblock instead
* of searching the backend
@@ -1036,6 +1052,10 @@ free_and_return_nolock:

slapi_ch_free_string(&proxydn);
slapi_ch_free_string(&proxystr);

#ifdef SYSTEMTAP
STAP_PROBE(ns-slapd, op_shared_search__return);
#endif
}

/* Returns 1 if this processing on this entry is finished


+ 16
- 0
ldap/servers/slapd/search.c View File

@@ -30,6 +30,10 @@
#include "pratom.h"
#include "snmp_collator.h"

#ifdef SYSTEMTAP
#include <sys/sdt.h>
#endif

static void log_search_access(Slapi_PBlock *pb, const char *base, int scope, const char *filter, const char *msg);

void
@@ -57,6 +61,9 @@ do_search(Slapi_PBlock *pb)
Connection *pb_conn = NULL;

slapi_log_err(SLAPI_LOG_TRACE, "do_search", "=>\n");
#ifdef SYSTEMTAP
STAP_PROBE(ns-slapd, do_search__entry);
#endif

slapi_pblock_get(pb, SLAPI_OPERATION, &operation);
ber = operation->o_ber;
@@ -373,6 +380,11 @@ do_search(Slapi_PBlock *pb)
slapi_pblock_set(pb, SLAPI_SEARCH_SIZELIMIT, &sizelimit);
slapi_pblock_set(pb, SLAPI_SEARCH_TIMELIMIT, &timelimit);


/*
* op_shared_search defines STAP_PROBE for __entry and __return,
* so these can be used to delineate the start and end here.
*/
op_shared_search(pb, psearch ? 0 : 1 /* send result */);

slapi_pblock_get(pb, SLAPI_PLUGIN_OPRETURN, &rc);
@@ -402,6 +414,10 @@ free_and_return:
}
slapi_ch_free_string(&rawbase);
}

#ifdef SYSTEMTAP
STAP_PROBE(ns-slapd, do_search__return);
#endif
}

static void


+ 64
- 0
profiling/stap/probe_do_search_detail.stp View File

@@ -0,0 +1,64 @@
#!/bin/env stap

global do_search_full
global do_search_prepared
global do_search_complete
global do_search_finalise

global entry_times%
global prepared_times%
global search_times%
global finalise_times%

// do_search__entry
// do_search__prepared
// do_search__op_shared_search_complete
// do_search__return

probe process(@1).mark("do_search__entry") {
entry_times[tid()] = gettimeofday_us()
prepared_times[tid()] = gettimeofday_us()
}

probe process(@1).mark("op_shared_search__entry") {
do_search_prepared <<< gettimeofday_us() - prepared_times[tid()]
delete prepared_times[tid()]

search_times[tid()] = gettimeofday_us()
}

probe process(@1).mark("op_shared_search__return") {
do_search_complete <<< gettimeofday_us() - search_times[tid()]
delete search_times[tid()]

finalise_times[tid()] = gettimeofday_us()
}

probe process(@1).mark("do_search__return") {
do_search_finalise <<< gettimeofday_us() - finalise_times[tid()]
delete finalise_times[tid()]

do_search_full <<< gettimeofday_us() - entry_times[tid()]
delete entry_times[tid()]
}

function report() {
printf("Distribution of do_search_full latencies (in nanoseconds) for %d samples\n", @count(do_search_full))
printf("max/avg/min: %d/%d/%d\n", @max(do_search_full), @avg(do_search_full), @min(do_search_full))
print(@hist_log(do_search_full))

printf("Distribution of do_search_prepared latencies (in nanoseconds) for %d samples\n", @count(do_search_prepared))
printf("max/avg/min: %d/%d/%d\n", @max(do_search_prepared), @avg(do_search_prepared), @min(do_search_prepared))
print(@hist_log(do_search_prepared))

printf("Distribution of do_search_complete latencies (in nanoseconds) for %d samples\n", @count(do_search_complete))
printf("max/avg/min: %d/%d/%d\n", @max(do_search_complete), @avg(do_search_complete), @min(do_search_complete))
print(@hist_log(do_search_complete))

printf("Distribution of do_search_finalise latencies (in nanoseconds) for %d samples\n", @count(do_search_finalise))
printf("max/avg/min: %d/%d/%d\n", @max(do_search_finalise), @avg(do_search_finalise), @min(do_search_finalise))
print(@hist_log(do_search_finalise))
}

probe end { report() }


+ 51
- 0
profiling/stap/probe_log_access_detail.stp View File

@@ -0,0 +1,51 @@
#!/bin/env stap

global log_access_full
global log_access_prepared
global log_access_complete

global entry_times%
global prepared_times%
global finalise_times%

// vslapd_log_access__entry
// vslapd_log_access__prepared
// vslapd_log_access__buffer


probe process(@1).mark("vslapd_log_access__entry") {
entry_times[tid()] = gettimeofday_us()
prepared_times[tid()] = gettimeofday_us()
}

probe process(@1).mark("vslapd_log_access__prepared") {
log_access_prepared <<< gettimeofday_us() - prepared_times[tid()]
delete prepared_times[tid()]

finalise_times[tid()] = gettimeofday_us()
}

probe process(@1).mark("vslapd_log_access__buffer") {
log_access_complete <<< gettimeofday_us() - finalise_times[tid()]
delete finalise_times[tid()]

log_access_full <<< gettimeofday_us() - entry_times[tid()]
delete entry_times[tid()]
}

function report() {
printf("Distribution of log_access_full latencies (in nanoseconds) for %d samples\n", @count(log_access_full))
printf("max/avg/min: %d/%d/%d\n", @max(log_access_full), @avg(log_access_full), @min(log_access_full))
print(@hist_log(log_access_full))

printf("Distribution of log_access_prepared latencies (in nanoseconds) for %d samples\n", @count(log_access_prepared))
printf("max/avg/min: %d/%d/%d\n", @max(log_access_prepared), @avg(log_access_prepared), @min(log_access_prepared))
print(@hist_log(log_access_prepared))

printf("Distribution of log_access_complete latencies (in nanoseconds) for %d samples\n", @count(log_access_complete))
printf("max/avg/min: %d/%d/%d\n", @max(log_access_complete), @avg(log_access_complete), @min(log_access_complete))
print(@hist_log(log_access_complete))
}

probe end { report() }


+ 64
- 0
profiling/stap/probe_op_shared_search.stp View File

@@ -0,0 +1,64 @@
#!/bin/env stap

global op_shared_search_full
global op_shared_search_prepared
global op_shared_search_complete
global op_shared_search_finalise

global entry_times%
global prepared_times%
global search_times%
global finalise_times%

// op_shared_search__entry
// op_shared_search__prepared
// op_shared_search__backends
// op_shared_search__complete

probe process(@1).mark("op_shared_search__entry") {
entry_times[tid()] = gettimeofday_us()
prepared_times[tid()] = gettimeofday_us()
}

probe process(@1).mark("op_shared_search__prepared") {
op_shared_search_prepared <<< gettimeofday_us() - prepared_times[tid()]
delete prepared_times[tid()]

search_times[tid()] = gettimeofday_us()
}

probe process(@1).mark("op_shared_search__backends") {
op_shared_search_complete <<< gettimeofday_us() - search_times[tid()]
delete search_times[tid()]
finalise_times[tid()] = gettimeofday_us()
}

probe process(@1).mark("op_shared_search__return") {
op_shared_search_finalise <<< gettimeofday_us() - finalise_times[tid()]
delete finalise_times[tid()]

op_shared_search_full <<< gettimeofday_us() - entry_times[tid()]
delete entry_times[tid()]
}

function report() {
printf("Distribution of op_shared_search_full latencies (in nanoseconds) for %d samples\n", @count(op_shared_search_full))
printf("max/avg/min: %d/%d/%d\n", @max(op_shared_search_full), @avg(op_shared_search_full), @min(op_shared_search_full))
print(@hist_log(op_shared_search_full))

printf("Distribution of op_shared_search_prepared latencies (in nanoseconds) for %d samples\n", @count(op_shared_search_prepared))
printf("max/avg/min: %d/%d/%d\n", @max(op_shared_search_prepared), @avg(op_shared_search_prepared), @min(op_shared_search_prepared))
print(@hist_log(op_shared_search_prepared))

printf("Distribution of op_shared_search_complete latencies (in nanoseconds) for %d samples\n", @count(op_shared_search_complete))
printf("max/avg/min: %d/%d/%d\n", @max(op_shared_search_complete), @avg(op_shared_search_complete), @min(op_shared_search_complete))
print(@hist_log(op_shared_search_complete))

printf("Distribution of op_shared_search_finalise latencies (in nanoseconds) for %d samples\n", @count(op_shared_search_finalise))
printf("max/avg/min: %d/%d/%d\n", @max(op_shared_search_finalise), @avg(op_shared_search_finalise), @min(op_shared_search_finalise))
print(@hist_log(op_shared_search_finalise))
}

probe end { report() }


+ 10
- 1
src/lib389/lib389/__init__.py View File

@@ -1021,7 +1021,7 @@ class DirSrv(SimpleLDAPObject, object):
self.log.debug("Using external ca certificate %s", certdir)
self.set_option(ldap.OPT_X_TLS_CACERTDIR, ensure_str(certdir))

if certdir or starttls:
if certdir or starttls or uri.startswith('ldaps://'):
try:
# Note this sets LDAP.OPT not SELF. Because once self has opened
# it can NOT change opts on reused (ie restart)
@@ -1119,6 +1119,10 @@ class DirSrv(SimpleLDAPObject, object):

@raise ValueError
'''
if not self.isLocal:
self.log.error("This is a remote instance!")
input('Press Enter when the instance has started ...')
return

if self.status() is True:
return
@@ -1186,6 +1190,11 @@ class DirSrv(SimpleLDAPObject, object):

@raise ValueError
'''
if not self.isLocal:
self.log.error("This is a remote instance!")
input('Press Enter when the instance has stopped ...')
return

if self.status() is False:
return



+ 19
- 7
src/lib389/lib389/ldclt.py View File

@@ -7,7 +7,7 @@
# --- END COPYRIGHT BLOCK ---

import subprocess
from lib389.utils import format_cmd_list
from lib389.utils import format_cmd_list, ensure_str

"""
This class will allow general usage of ldclt.
@@ -99,16 +99,28 @@ loginShell: /bin/false
self.log.debug("ldclt loadtest ...")
self.log.debug(format_cmd_list(cmd))
try:
result = subprocess.check_output(cmd)
result = ensure_str(subprocess.check_output(cmd))
# If verbose, capture / log the output.
except subprocess.CalledProcessError as e:
print(format_cmd_list(cmd))
print(result)
raise(e)
self.log.debug(result)
return result
# The output looks like:
# ldclt[44308]: Average rate: 4017.60/thr (4017.60/sec), total: 40176
# ldclt[44308]: Number of samples achieved. Bye-bye...
# ldclt[44308]: All threads are dead - exit.
# ldclt[44308]: Global average rate: 40604.00/thr (4060.40/sec), total: 406040
# ldclt[44308]: Global number times "no activity" reports: never
# ldclt[44308]: Global no error occurs during this session.
# So we want the "global avg rate" per second.
section = None
for line in result.splitlines():
if 'Global average rate' in line:
section = line.split('(')[1].split(')')[0].split('/')[0]
return section

def bind_loadtest(self, subtree, min=1000, max=9999, rounds=3):
def bind_loadtest(self, subtree, min=1000, max=9999, rounds=10):
# The bind users will be uid=userXXXX
digits = len('%s' % max)
cmd = [
@@ -128,9 +140,9 @@ loginShell: /bin/false
'-e',
'bindonly',
]
self._run_ldclt(cmd)
return self._run_ldclt(cmd)

def search_loadtest(self, subtree, fpattern, min=1000, max=9999, rounds=3):
def search_loadtest(self, subtree, fpattern, min=1000, max=9999, rounds=10):
digits = len('%s' % max)
cmd = [
'%s/ldclt' % self.ds.get_bin_dir(),
@@ -151,4 +163,4 @@ loginShell: /bin/false
'-e',
'randomattrlist=cn:uid:ou',
]
self._run_ldclt(cmd)
return self._run_ldclt(cmd)

+ 0
- 1
src/lib389/lib389/topologies.py View File

@@ -320,7 +320,6 @@ def topology_st_gssapi(request):

return topology


@pytest.fixture(scope="module")
def topology_i2(request):
"""Create two instance DS deployment"""


Loading…
Cancel
Save