Commit 57159ea2 authored by Neil Horman's avatar Neil Horman

Migrate to use of irq_info and remove struct interrupt

Migrate core workload calculation code to use new irq_info struct and for_each_*
helper functions
parent 90cf41fb
......@@ -32,30 +32,28 @@
#include "irqbalance.h"
void activate_mapping(void)
static void activate_mapping(struct irq_info *info, void *data __attribute__((unused)))
{
struct interrupt *irq;
GList *iter;
iter = g_list_first(interrupts);
while (iter) {
irq = iter->data;
iter = g_list_next(iter);
/* don't set the level if it's a NONE irq, or if there is
* no change */
if (irq->balance_level != BALANCE_NONE &&
!cpus_equal(irq->mask, irq->old_mask)) {
char buf[PATH_MAX];
FILE *file;
sprintf(buf, "/proc/irq/%i/smp_affinity", irq->number);
file = fopen(buf, "w");
if (!file)
continue;
cpumask_scnprintf(buf, PATH_MAX, irq->mask);
fprintf(file,"%s", buf);
fclose(file);
irq->old_mask = irq->mask;
}
}
char buf[PATH_MAX];
FILE *file;
if (info->level == BALANCE_NONE)
return;
if (cpus_equal(info->mask, info->old_mask))
return;
sprintf(buf, "/proc/irq/%i/smp_affinity", info->irq);
file = fopen(buf, "w");
if (!file)
return;
cpumask_scnprintf(buf, PATH_MAX, info->mask);
fprintf(file, "%s", buf);
fclose(file);
info->old_mask = info->mask;
}
void activate_mappings(void)
{
for_each_irq(NULL, activate_mapping, NULL);
}
......@@ -56,38 +56,6 @@ static short class_codes[MAX_CLASS] = {
static GList *interrupts_db;
#define SYSDEV_DIR "/sys/bus/pci/devices"
union property {
int int_val;
cpumask_t mask_val;
};
enum irq_type {
INT_TYPE = 0,
CPUMASK_TYPE,
};
struct irq_property {
enum irq_type itype;
union property iproperty;
};
#define iint_val iproperty.int_val
#define imask_val iproperty.mask_val
struct irq_info {
int irq;
struct irq_property property[IRQ_MAX_PROPERTY];
};
static void init_new_irq(struct irq_info *new)
{
new->property[IRQ_CLASS].itype = INT_TYPE;
new->property[IRQ_TYPE].itype = INT_TYPE;
new->property[IRQ_NUMA].itype = INT_TYPE;
new->property[IRQ_LEVEL].itype = INT_TYPE;
new->property[IRQ_LCPU_MASK].itype = CPUMASK_TYPE;
new->property[IRQ_INT_COUNT].itype = INT_TYPE;
new->property[IRQ_LAST_INT_COUNT].itype = INT_TYPE;
}
static gint compare_ints(gconstpointer a, gconstpointer b)
{
......@@ -129,13 +97,12 @@ static struct irq_info *add_one_irq_to_db(const char *devpath, int irq)
return NULL;
}
new = malloc(sizeof(struct irq_info));
new = calloc(sizeof(struct irq_info), 1);
if (!new)
return NULL;
init_new_irq(new);
new->irq = irq;
new->property[IRQ_CLASS].iint_val = IRQ_OTHER;
new->class = IRQ_OTHER;
interrupts_db = g_list_append(interrupts_db, new);
......@@ -162,8 +129,8 @@ static struct irq_info *add_one_irq_to_db(const char *devpath, int irq)
if (class >= MAX_CLASS)
goto get_numa_node;
new->property[IRQ_CLASS].iint_val = class_codes[class];
new->property[IRQ_LEVEL].iint_val = map_class_to_level[class_codes[class]];
new->class = class_codes[class];
new->level = map_class_to_level[class_codes[class]];
get_numa_node:
numa_node = -1;
......@@ -176,23 +143,39 @@ get_numa_node:
fclose(fd);
assign_node:
new->property[IRQ_NUMA].iint_val = numa_node;
new->numa_node = get_numa_node(numa_node);
sprintf(path, "%s/local_cpus", devpath);
fd = fopen(path, "r");
if (!fd) {
cpus_setall(new->property[IRQ_LCPU_MASK].imask_val);
goto out;
cpus_setall(new->cpumask);
goto assign_affinity_hint;
}
lcpu_mask = NULL;
rc = fscanf(fd, "%as", &lcpu_mask);
fclose(fd);
if (!lcpu_mask) {
cpus_setall(new->property[IRQ_LCPU_MASK].imask_val);
cpus_setall(new->cpumask);
} else {
cpumask_parse_user(lcpu_mask, strlen(lcpu_mask),
new->property[IRQ_LCPU_MASK].imask_val);
new->cpumask);
}
free(lcpu_mask);
assign_affinity_hint:
cpus_clear(new->affinity_hint);
sprintf(path, "/proc/irq/%d/affinity_hint", irq);
fd = fopen(path, "r");
if (!fd)
goto out;
lcpu_mask = NULL;
rc = fscanf(fd, "%as", &lcpu_mask);
fclose(fd);
if (!lcpu_mask)
goto out;
cpumask_parse_user(lcpu_mask, strlen(lcpu_mask),
new->affinity_hint);
free(lcpu_mask);
out:
if (debug_mode)
printf("Adding IRQ %d to database\n", irq);
......@@ -230,7 +213,7 @@ static void build_one_dev_entry(const char *dirname)
new = add_one_irq_to_db(path, irqnum);
if (!new)
continue;
new->property[IRQ_TYPE].iint_val = IRQ_TYPE_MSIX;
new->type = IRQ_TYPE_MSIX;
}
} while (entry != NULL);
closedir(msidir);
......@@ -252,8 +235,9 @@ static void build_one_dev_entry(const char *dirname)
new = add_one_irq_to_db(path, irqnum);
if (!new)
goto done;
new->property[IRQ_TYPE].iint_val = IRQ_TYPE_LEGACY;
new->type = IRQ_TYPE_LEGACY;
}
done:
fclose(fd);
return;
......@@ -286,117 +270,82 @@ void rebuild_irq_db(void)
closedir(devdir);
}
static GList *add_misc_irq(int irq)
struct irq_info *add_misc_irq(int irq)
{
struct irq_info *new, find;
struct irq_info *new;
new = malloc(sizeof(struct irq_info));
new = calloc(sizeof(struct irq_info), 1);
if (!new)
return NULL;
init_new_irq(new);
new->irq = irq;
new->property[IRQ_TYPE].iint_val = IRQ_TYPE_LEGACY;
new->property[IRQ_CLASS].iint_val = IRQ_OTHER;
new->property[IRQ_NUMA].iint_val = -1;
new->type = IRQ_TYPE_LEGACY;
new->class = IRQ_OTHER;
new->numa_node = get_numa_node(0);
interrupts_db = g_list_append(interrupts_db, new);
find.irq = irq;
return g_list_find_custom(interrupts_db, &find, compare_ints);
return new;
}
int find_irq_integer_prop(int irq, enum irq_prop prop)
void for_each_irq(GList *list, void (*cb)(struct irq_info *info, void *data), void *data)
{
GList *entry;
struct irq_info find, *result;
find.irq = irq;
GList *entry = g_list_first(list ? list : interrupts_db);
GList *next;
entry = g_list_find_custom(interrupts_db, &find, compare_ints);
if (!entry) {
if (debug_mode)
printf("No entry for irq %d in the irq database, adding default entry\n", irq);
entry = add_misc_irq(irq);
while (entry) {
next = g_list_next(entry);
cb(entry->data, data);
entry = next;
}
result = entry->data;
assert(result->property[prop].itype == INT_TYPE);
return result->property[prop].iint_val;
}
int set_irq_integer_prop(int irq, enum irq_prop prop, int val)
struct irq_info *get_irq_info(int irq)
{
GList *entry;
struct irq_info find, *result;
find.irq = irq;
struct irq_info find;
find.irq = irq;
entry = g_list_find_custom(interrupts_db, &find, compare_ints);
if (!entry) {
if (debug_mode)
printf("No entry for irq %d in the irq database, adding default entry\n", irq);
entry = add_misc_irq(irq);
}
result = entry->data;
assert(result->property[prop].itype == INT_TYPE);
result->property[prop].iint_val = val;
return 0;
return entry ? entry->data : NULL;
}
cpumask_t find_irq_cpumask_prop(int irq, enum irq_prop prop)
void migrate_irq(GList **from, GList **to, struct irq_info *info)
{
GList *entry;
struct irq_info find, *result;
find.irq = irq;
struct irq_info find, *tmp;;
entry = g_list_find_custom(interrupts_db, &find, compare_ints);
if (from != NULL) {
find.irq = info->irq;
entry = g_list_find_custom(*from, &find, compare_ints);
tmp = entry->data;
*from = g_list_delete_link(*from, entry);
} else
tmp = info;
if (!entry) {
if (debug_mode)
printf("No entry for irq %d in the irq database, adding default entry\n", irq);
entry = add_misc_irq(irq);
}
result = entry->data;
assert(result->property[prop].itype == CPUMASK_TYPE);
return result->property[prop].imask_val;
*to = g_list_append(*to, tmp);
}
int get_next_irq(int irq)
static gint sort_irqs(gconstpointer A, gconstpointer B)
{
GList *entry;
struct irq_info *irqp, find;
struct irq_info *a, *b;
a = (struct irq_info*)A;
b = (struct irq_info*)B;
if (irq == -1) {
entry = g_list_first(interrupts_db);
irqp = entry->data;
return irqp->irq;
}
find.irq = irq;
entry = g_list_find_custom(interrupts_db, &find, compare_ints);
if (!entry)
if (a->class < b->class)
return 1;
if (a->class > b->class)
return -1;
entry = g_list_next(entry);
if (!entry)
if (a->workload < b->workload)
return 1;
if (a->workload > b->workload)
return -1;
irqp= entry->data;
return irqp->irq;
if (a<b)
return 1;
return -1;
}
void for_each_irq(void (*cb)(int irq))
void sort_irq_list(void)
{
struct irq_info *info;
GList *entry = g_list_first(interrupts_db);
while (entry) {
info = entry->data;
cb(info->irq);
entry = g_list_next(entry);
}
interrupts_db = g_list_sort(interrupts_db, sort_irqs);
}
......@@ -283,49 +283,47 @@ static void do_one_cpu(char *path)
core_count++;
}
static void dump_irqs(int spaces, GList *dump_interrupts)
static void dump_irq(struct irq_info *info, void *data)
{
struct interrupt *irq;
while (dump_interrupts) {
int i;
for (i=0; i<spaces; i++) printf(" ");
irq = dump_interrupts->data;
printf("Interrupt %i node_num is %d (%s/%u) \n", irq->number, irq->node_num, classes[irq->class], (unsigned int)irq->workload);
dump_interrupts = g_list_next(dump_interrupts);
}
int spaces = (long int)data;
int i;
for (i=0; i<spaces; i++) printf(" ");
printf("Interrupt %i node_num is %d (%s/%u) \n", info->irq, irq_numa_node(info)->number, classes[info->class], (unsigned int)info->workload);
}
void dump_tree(void)
static void dump_cpu_core(struct cpu_core *c, void *data __attribute__((unused)))
{
GList *p_iter, *c_iter, *cp_iter;
struct package *package;
struct cache_domain *cache_domain;
struct cpu_core *cpu;
printf(" CPU number %i numa_node is %d (workload %lu)\n", c->number, cpu_numa_node(c)->number , (unsigned long)c->workload);
if (c->interrupts)
for_each_irq(c->interrupts, dump_irq, (void *)18);
}
static void dump_cache_domain(struct cache_domain *c, void *data)
{
char *buffer = data;
cpumask_scnprintf(buffer, 4095, c->mask);
printf(" Cache domain %i: numa_node is %d cpu mask is %s (workload %lu) \n", c->number, cache_domain_numa_node(c)->number, buffer, (unsigned long)c->workload);
if (c->cpu_cores)
for_each_cpu_core(c->cpu_cores, dump_cpu_core, NULL);
if (c->interrupts)
for_each_irq(c->interrupts, dump_irq, (void *)10);
}
static void dump_package(struct package *p, void *data)
{
char *buffer = data;
cpumask_scnprintf(buffer, 4096, p->mask);
printf("Package %i: numa_node is %d cpu mask is %s (workload %lu)\n", p->number, package_numa_node(p)->number, buffer, (unsigned long)p->workload);
if (p->cache_domains)
for_each_cache_domain(p->cache_domains, dump_cache_domain, buffer);
if (p->interrupts)
for_each_irq(p->interrupts, dump_irq, (void *)2);
}
void dump_tree(void)
{
char buffer[4096];
p_iter = g_list_first(packages);
while (p_iter) {
package = p_iter->data;
cpumask_scnprintf(buffer, 4096, package->mask);
printf("Package %i: numa_node is %d cpu mask is %s (workload %lu)\n", package->number, package_numa_node(package)->number, buffer, (unsigned long)package->workload);
c_iter = g_list_first(package->cache_domains);
while (c_iter) {
cache_domain = c_iter->data;
c_iter = g_list_next(c_iter);
cpumask_scnprintf(buffer, 4095, cache_domain->mask);
printf(" Cache domain %i: numa_node is %d cpu mask is %s (workload %lu) \n", cache_domain->number, cache_domain->node_num, buffer, (unsigned long)cache_domain->workload);
cp_iter = cache_domain->cpu_cores;
while (cp_iter) {
cpu = cp_iter->data;
cp_iter = g_list_next(cp_iter);
printf(" CPU number %i numa_node is %d (workload %lu)\n", cpu->number, cpu_numa_node(cpu)->number , (unsigned long)cpu->workload);
dump_irqs(18, cpu->interrupts);
}
dump_irqs(10, cache_domain->interrupts);
}
dump_irqs(2, package->interrupts);
p_iter = g_list_next(p_iter);
}
for_each_package(NULL, dump_package, buffer);
}
/*
......@@ -444,3 +442,41 @@ void clear_cpu_tree(void)
core_count = 0;
}
void for_each_package(GList *list, void (*cb)(struct package *p, void *data), void *data)
{
GList *entry = g_list_first(list ? list : packages);
GList *next;
while (entry) {
next = g_list_next(entry);
cb(entry->data, data);
entry = next;
}
}
void for_each_cache_domain(GList *list, void (*cb)(struct cache_domain *c, void *data), void *data)
{
GList *entry = g_list_first(list ? list : cache_domains);
GList *next;
while (entry) {
next = g_list_next(entry);
cb(entry->data, data);
entry = next;
}
}
void for_each_cpu_core(GList *list, void (*cb)(struct cpu_core *c, void *data), void *data)
{
GList *entry = g_list_first(list ? list : cpus);
GList *next;
while (entry) {
next = g_list_next(entry);
cb(entry->data, data);
entry = next;
}
}
......@@ -124,7 +124,7 @@ static void free_object_tree()
static void dump_object_tree()
{
for_each_numa_node(dump_numa_node_info);
for_each_numa_node(NULL, dump_numa_node_info, NULL);
}
int main(int argc, char** argv)
......@@ -185,7 +185,6 @@ int main(int argc, char** argv)
sleep(SLEEP_INTERVAL/4);
reset_counts();
parse_proc_interrupts();
pci_numa_scan();
calculate_workload();
sort_irq_list();
if (debug_mode)
......@@ -216,13 +215,8 @@ int main(int argc, char** argv)
calculate_workload();
/* to cope with dynamic configurations we scan for new numa information
* once every 5 minutes
*/
pci_numa_scan();
calculate_placement();
activate_mapping();
activate_mappings();
if (debug_mode)
dump_tree();
......
......@@ -32,7 +32,6 @@ extern void parse_proc_interrupts(void);
extern void set_interrupt_count(int number, uint64_t count);
extern void set_msi_interrupt_numa(int number);
extern void add_interrupt_numa(int number, cpumask_t mask, int node_num, int type);
void calculate_workload(void);
void reset_counts(void);
......@@ -41,7 +40,7 @@ void sort_irq_list(void);
void calculate_placement(void);
void dump_tree(void);
void activate_mapping(void);
void activate_mappings(void);
void account_for_nic_stats(void);
void check_power_mode(void);
void clear_cpu_tree(void);
......@@ -53,19 +52,23 @@ void pci_numa_scan(void);
*/
extern void build_numa_node_list(void);
extern void free_numa_node_list(void);
extern void dump_numa_node_info(struct numa_node *node);
extern void for_each_numa_node(void (*cb)(struct numa_node *node));
extern void dump_numa_node_info(struct numa_node *node, void *data);
extern void for_each_numa_node(GList *list, void (*cb)(struct numa_node *node, void *data), void *data);
extern void add_package_to_node(struct package *p, int nodeid);
extern struct numa_node *get_numa_node(int nodeid);
/*
* Package functions
*/
#define package_numa_node(p) ((p)->numa_node)
extern void for_each_package(GList *list, void (*cb)(struct package *p, void *data), void *data);
/*
* cache_domain functions
*/
#define cache_domain_package(c) ((c)->package)
#define cache_domain_numa_node(c) (package_numa_node(cache_domain_package((c))))
extern void for_each_cache_domain(GList *list, void (*cb)(struct cache_domain *c, void *data), void *data);
/*
* cpu core functions
......@@ -73,15 +76,19 @@ extern void add_package_to_node(struct package *p, int nodeid);
#define cpu_cache_domain(cpu) ((cpu)->cache_domain)
#define cpu_package(cpu) (cache_domain_package(cpu_cache_domain((cpu))))
#define cpu_numa_node(cpu) (package_numa_node(cache_domain_package(cpu_cache_domain((cpu)))))
extern void for_each_cpu_core(GList *list, void (*cb)(struct cpu_core *c, void *data), void *data);
/*
* irq db functions
*/
extern void rebuild_irq_db(void);
extern void free_irq_db(void);
extern int set_irq_integer_prop(int irq, enum irq_prop prop, int val);
extern int find_irq_integer_prop(int irq, enum irq_prop prop);
extern cpumask_t find_irq_cpumask_prop(int irq, enum irq_prop prop);
extern void for_each_irq(void (*cb)(int irq));
extern void for_each_irq(GList *list, void (*cb)(struct irq_info *info, void *data), void *data);
extern struct irq_info *get_irq_info(int irq);
extern void migrate_irq(GList **from, GList **to, struct irq_info *info);
extern struct irq_info *add_misc_irq(int irq);
#define irq_numa_node(irq) ((irq)->numa_node)
#endif
......@@ -33,11 +33,9 @@
#include "types.h"
#include "irqbalance.h"
GList *interrupts;
void get_affinity_hint(struct interrupt *irq, int number)
void get_affinity_hint(struct irq_info *irq, int number)
{
char buf[PATH_MAX];
cpumask_t tempmask;
......@@ -55,260 +53,46 @@ void get_affinity_hint(struct interrupt *irq, int number)
}
cpumask_parse_user(line, strlen(line), tempmask);
if (!__cpus_full(&tempmask, num_possible_cpus()))
irq->node_mask = tempmask;
irq->affinity_hint = tempmask;
fclose(file);
free(line);
}
/*
* This function classifies and reads various things from /proc about a specific irq
*/
static void investigate(struct interrupt *irq, int number)
void build_workload(struct irq_info *info, void *unused __attribute__((unused)))
{
DIR *dir;
struct dirent *entry;
char *c, *c2;
int nr , count = 0, can_set = 1;
char buf[PATH_MAX];
sprintf(buf, "/proc/irq/%i", number);
dir = opendir(buf);
do {
entry = readdir(dir);
if (!entry)
break;
if (strcmp(entry->d_name,"smp_affinity")==0) {
char *line = NULL;
size_t size = 0;
FILE *file;
sprintf(buf, "/proc/irq/%i/smp_affinity", number);
file = fopen(buf, "r+");
if (!file)
continue;
if (getline(&line, &size, file)==0) {
free(line);
fclose(file);
continue;
}
cpumask_parse_user(line, strlen(line), irq->mask);
/*
* Check that we can write the affinity, if
* not take it out of the list.
*/
fputs(line, file);
if (fclose(file) && errno == EIO)
can_set = 0;
free(line);
} else if (strcmp(entry->d_name,"allowed_affinity")==0) {
char *line = NULL;
size_t size = 0;
FILE *file;
sprintf(buf, "/proc/irq/%i/allowed_affinity", number);
file = fopen(buf, "r");
if (!file)
continue;
if (getline(&line, &size, file)==0) {
free(line);
fclose(file);
continue;
}
cpumask_parse_user(line, strlen(line), irq->allowed_mask);
fclose(file);
free(line);
} else if (strcmp(entry->d_name,"affinity_hint")==0) {
get_affinity_hint(irq, number);
} else {
irq->class = find_irq_integer_prop(irq->number, IRQ_CLASS);
}
} while (entry);
closedir(dir);
irq->balance_level = map_class_to_level[irq->class];
for (nr = 0; nr < NR_CPUS; nr++)
if (cpu_isset(nr, irq->allowed_mask))
count++;
/* if there is no choice in the allowed mask, don't bother to balance */
if ((count<2) || (can_set == 0))
irq->balance_level = BALANCE_NONE;
/* next, check the IRQBALANCE_BANNED_INTERRUPTS env variable for blacklisted irqs */
c = c2 = getenv("IRQBALANCE_BANNED_INTERRUPTS");
if (!c)
return;
do {
c = c2;
nr = strtoul(c, &c2, 10);
if (c!=c2 && nr == number)
irq->balance_level = BALANCE_NONE;
} while (c!=c2 && c2!=NULL);
}
/* Set numa node number for MSI interrupt;
* Assumes existing irq metadata
*/
void set_msi_interrupt_numa(int number)
{
GList *item;
struct interrupt *irq;
int node;
node = find_irq_integer_prop(number, IRQ_NUMA);
if (node < 0)
return;
item = g_list_first(interrupts);
while (item) {
irq = item->data;
if (irq->number == number) {
irq->node_num = node;
irq->msi = 1;
return;
}
item = g_list_next(item);
}
}
/*
* Set the number of interrupts received for a specific irq;
* create the irq metadata if there is none yet
*/
void set_interrupt_count(int number, uint64_t count)
{
GList *item;
struct interrupt *irq;
if (count < MIN_IRQ_COUNT && !one_shot_mode)
return; /* no need to track or set interrupts sources without any activity since boot
but allow for a few (20) boot-time-only interrupts */
item = g_list_first(interrupts);
while (item) {
irq = item->data;
if (irq->number == number) {
irq->count = count;
set_irq_integer_prop(number, IRQ_INT_COUNT, count);
/* see if affinity_hint changed */
get_affinity_hint(irq, number);
return;