Commit e2f6588b authored by Neil Horman's avatar Neil Horman

Add powersave settings

Add an optional heuristic to allow cpus to not service interrupts during periods
of low activity, to help power conservation. If more than power_thresh cpus are
more then a standard deviation below the average load, and no cpus are
overloaded by more than a standard deviation and have more than one irq on them,
then we stop balancing to a single cpu.  If at any time we have a cpu go over a
standard deviation of load, we re-enable all the cpus for balancing
parent 38b3bb82
......@@ -40,9 +40,9 @@ int debug_mode;
int numa_avail;
int need_cpu_rescan;
extern cpumask_t banned_cpus;
static int counter;
enum hp_e hint_policy = HINT_POLICY_SUBSET;
unsigned long power_thresh = ULONG_MAX;
unsigned long long cycle_count = 0;
void sleep_approx(int seconds)
{
......@@ -63,12 +63,14 @@ struct option lopts[] = {
{"oneshot", 0, NULL, 'o'},
{"debug", 0, NULL, 'd'},
{"hintpolicy", 1, NULL, 'h'},
{"powerthresh", 1, NULL, 'p'},
{0, 0, 0, 0}
};
static void usage(void)
{
printf("irqbalance [--oneshot | -o] [--debug | -d] [--hintpolicy= | -h [exact|subset|ignore]]");
printf("irqbalance [--oneshot | -o] [--debug | -d] [--hintpolicy= | -h [exact|subset|ignore]]\n");
printf(" [--powerthresh= | -p <off> | <n>]\n");
}
static void parse_command_line(int argc, char **argv)
......@@ -77,7 +79,7 @@ static void parse_command_line(int argc, char **argv)
int longind;
while ((opt = getopt_long(argc, argv,
"odh:",
"odh:p:",
lopts, &longind)) != -1) {
switch(opt) {
......@@ -99,6 +101,17 @@ static void parse_command_line(int argc, char **argv)
exit(1);
}
break;
case 'p':
if (!strncmp(optarg, "off", strlen(optarg)))
power_thresh = ULONG_MAX;
else {
power_thresh = strtoull(optarg, NULL, 10);
if (power_thresh == ULONG_MAX) {
usage();
exit(1);
}
}
break;
case 'o':
one_shot_mode=1;
break;
......@@ -153,7 +166,6 @@ static void force_rebalance_irq(struct irq_info *info, void *data __attribute__(
int main(int argc, char** argv)
{
int compute_migration_status=0;
#ifdef HAVE_GETOPT_LONG
parse_command_line(argc, argv);
......@@ -214,7 +226,6 @@ int main(int argc, char** argv)
printf("\n\n\n-----------------------------------------------------------------------------\n");
check_power_mode();
parse_proc_interrupts();
parse_proc_stat();
......@@ -231,14 +242,11 @@ int main(int argc, char** argv)
free_object_tree();
build_object_tree();
for_each_irq(NULL, force_rebalance_irq, NULL);
compute_migration_status=0;
cycle_count=0;
}
if (compute_migration_status)
if (cycle_count)
update_migration_status();
else
compute_migration_status=1;
calculate_placement();
activate_mappings();
......@@ -248,7 +256,7 @@ int main(int argc, char** argv)
if (one_shot_mode)
break;
clear_work_stats();
counter++;
cycle_count++;
}
free_object_tree();
......
......@@ -60,6 +60,8 @@ extern int one_shot_mode;
extern int power_mode;
extern int need_cpu_rescan;
extern enum hp_e hint_policy;
extern unsigned long long cycle_count;
extern unsigned long power_thresh;
/*
* Numa node access routines
......
......@@ -42,6 +42,10 @@ struct load_balance_info {
int load_sources;
unsigned long long int deviations;
long double std_deviation;
unsigned int num_within;
unsigned int num_over;
unsigned int num_under;
struct topo_obj *powersave;
};
static void gather_load_stats(struct topo_obj *obj, void *data)
......@@ -102,13 +106,21 @@ static void migrate_overloaded_irqs(struct topo_obj *obj, void *data)
/*
* Don't rebalance irqs on objects whos load is below the average
*/
if (obj->load <= info->avg_load)
if (obj->load <= info->avg_load) {
if ((obj->load + info->std_deviation) <= info->avg_load) {
info->num_under++;
info->powersave = obj;
} else
info->num_within++;
return;
}
deviation = obj->load - info->avg_load;
if ((deviation > info->std_deviation) &&
(g_list_length(obj->interrupts) > 1)) {
info->num_over++;
/*
* We have a cpu that is overloaded and
* has irqs that can be moved to fix that
......@@ -124,10 +136,21 @@ static void migrate_overloaded_irqs(struct topo_obj *obj, void *data)
* difference reaches zero
*/
for_each_irq(obj->interrupts, move_candidate_irqs, &deviation);
}
} else
info->num_within++;
}
static void force_irq_migration(struct irq_info *info, void *data __attribute__((unused)))
{
migrate_irq(&info->assigned_obj->interrupts, &rebalance_irq_list, info);
}
static void clear_powersave_mode(struct topo_obj *obj, void *data __attribute__((unused)))
{
obj->powersave_mode = 0;
}
#define find_overloaded_objs(name, info) do {\
int ___load_sources;\
memset(&(info), 0, sizeof(struct load_balance_info));\
......@@ -145,6 +168,13 @@ void update_migration_status(void)
struct load_balance_info info;
find_overloaded_objs(cpus, info);
if (cycle_count > 5) {
if (!info.num_over && (info.num_under >= power_thresh)) {
info.powersave->powersave_mode = 1;
for_each_irq(info.powersave->interrupts, force_irq_migration, NULL);
} else if (info.num_over)
for_each_object(cpus, clear_powersave_mode, NULL);
}
find_overloaded_objs(cache_domains, info);
find_overloaded_objs(packages, info);
find_overloaded_objs(numa_nodes, info);
......
......@@ -58,6 +58,9 @@ static void find_best_object(struct topo_obj *d, void *data)
}
}
if (d->powersave_mode)
return;
newload = d->load;
if (newload < best->best_cost) {
best->best = d;
......
......@@ -28,54 +28,7 @@
#include "irqbalance.h"
extern int power_mode;
static uint64_t previous;
static unsigned int hysteresis;
void check_power_mode(void)
{
FILE *file;
char *line = NULL;
size_t size = 0;
char *c;
uint64_t dummy __attribute__((unused));
uint64_t irq, softirq;
file = fopen("/proc/stat", "r");
if (!file)
return;
if (getline(&line, &size, file)==0)
size=0;
fclose(file);
if (!line)
return;
c=&line[4];
dummy = strtoull(c, &c, 10); /* user */
dummy = strtoull(c, &c, 10); /* nice */
dummy = strtoull(c, &c, 10); /* system */
dummy = strtoull(c, &c, 10); /* idle */
dummy = strtoull(c, &c, 10); /* iowait */
irq = strtoull(c, &c, 10); /* irq */
softirq = strtoull(c, &c, 10); /* softirq */
irq += softirq;
printf("IRQ delta is %lu \n", (unsigned long)(irq - previous) );
if (irq - previous < POWER_MODE_SOFTIRQ_THRESHOLD) {
hysteresis++;
if (hysteresis > POWER_MODE_HYSTERESIS) {
if (debug_mode && !power_mode)
printf("IRQ delta is %lu, switching to power mode \n", (unsigned long)(irq - previous) );
power_mode = 1;
}
} else {
if (debug_mode && power_mode)
printf("IRQ delta is %lu, switching to performance mode \n", (unsigned long)(irq - previous) );
power_mode = 0;
hysteresis = 0;
}
previous = irq;
free(line);
}
......@@ -37,6 +37,7 @@ struct topo_obj {
uint64_t load;
enum obj_type_e obj_type;
int number;
int powersave_mode;
cpumask_t mask;
GList *interrupts;
struct topo_obj *parent;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment