// SPDX-License-Identifier: GPL-2.0 | |

// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> | |

#include <linux/kernel.h> | |

#include <linux/percpu.h> | |

#include <linux/slab.h> | |

#include <linux/static_key.h> | |

#include <linux/interrupt.h> | |

#include <linux/idr.h> | |

#include <linux/irq.h> | |

#include <linux/math64.h> | |

#include <trace/events/irq.h> | |

#include "internals.h" | |

DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); | |

DEFINE_PER_CPU(struct irq_timings, irq_timings); | |

struct irqt_stat { | |

u64 next_evt; | |

u64 last_ts; | |

u64 variance; | |

u32 avg; | |

u32 nr_samples; | |

int anomalies; | |

int valid; | |

}; | |

static DEFINE_IDR(irqt_stats); | |

void irq_timings_enable(void) | |

{ | |

static_branch_enable(&irq_timing_enabled); | |

} | |

void irq_timings_disable(void) | |

{ | |

static_branch_disable(&irq_timing_enabled); | |

} | |

/** | |

* irqs_update - update the irq timing statistics with a new timestamp | |

* | |

* @irqs: an irqt_stat struct pointer | |

* @ts: the new timestamp | |

* | |

* The statistics are computed online, in other words, the code is | |

* designed to compute the statistics on a stream of values rather | |

* than doing multiple passes on the values to compute the average, | |

* then the variance. The integer division introduces a loss of | |

* precision but with an acceptable error margin regarding the results | |

* we would have with the double floating precision: we are dealing | |

* with nanosec, so big numbers, consequently the mantisse is | |

* negligeable, especially when converting the time in usec | |

* afterwards. | |

* | |

* The computation happens at idle time. When the CPU is not idle, the | |

* interrupts' timestamps are stored in the circular buffer, when the | |

* CPU goes idle and this routine is called, all the buffer's values | |

* are injected in the statistical model continuying to extend the | |

* statistics from the previous busy-idle cycle. | |

* | |

* The observations showed a device will trigger a burst of periodic | |

* interrupts followed by one or two peaks of longer time, for | |

* instance when a SD card device flushes its cache, then the periodic | |

* intervals occur again. A one second inactivity period resets the | |

* stats, that gives us the certitude the statistical values won't | |

* exceed 1x10^9, thus the computation won't overflow. | |

* | |

* Basically, the purpose of the algorithm is to watch the periodic | |

* interrupts and eliminate the peaks. | |

* | |

* An interrupt is considered periodically stable if the interval of | |

* its occurences follow the normal distribution, thus the values | |

* comply with: | |

* | |

* avg - 3 x stddev < value < avg + 3 x stddev | |

* | |

* Which can be simplified to: | |

* | |

* -3 x stddev < value - avg < 3 x stddev | |

* | |

* abs(value - avg) < 3 x stddev | |

* | |

* In order to save a costly square root computation, we use the | |

* variance. For the record, stddev = sqrt(variance). The equation | |

* above becomes: | |

* | |

* abs(value - avg) < 3 x sqrt(variance) | |

* | |

* And finally we square it: | |

* | |

* (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 | |

* | |

* (value - avg) x (value - avg) < 9 x variance | |

* | |

* Statistically speaking, any values out of this interval is | |

* considered as an anomaly and is discarded. However, a normal | |

* distribution appears when the number of samples is 30 (it is the | |

* rule of thumb in statistics, cf. "30 samples" on Internet). When | |

* there are three consecutive anomalies, the statistics are resetted. | |

* | |

*/ | |

static void irqs_update(struct irqt_stat *irqs, u64 ts) | |

{ | |

u64 old_ts = irqs->last_ts; | |

u64 variance = 0; | |

u64 interval; | |

s64 diff; | |

/* | |

* The timestamps are absolute time values, we need to compute | |

* the timing interval between two interrupts. | |

*/ | |

irqs->last_ts = ts; | |

/* | |

* The interval type is u64 in order to deal with the same | |

* type in our computation, that prevent mindfuck issues with | |

* overflow, sign and division. | |

*/ | |

interval = ts - old_ts; | |

/* | |

* The interrupt triggered more than one second apart, that | |

* ends the sequence as predictible for our purpose. In this | |

* case, assume we have the beginning of a sequence and the | |

* timestamp is the first value. As it is impossible to | |

* predict anything at this point, return. | |

* | |

* Note the first timestamp of the sequence will always fall | |

* in this test because the old_ts is zero. That is what we | |

* want as we need another timestamp to compute an interval. | |

*/ | |

if (interval >= NSEC_PER_SEC) { | |

memset(irqs, 0, sizeof(*irqs)); | |

irqs->last_ts = ts; | |

return; | |

} | |

/* | |

* Pre-compute the delta with the average as the result is | |

* used several times in this function. | |

*/ | |

diff = interval - irqs->avg; | |

/* | |

* Increment the number of samples. | |

*/ | |

irqs->nr_samples++; | |

/* | |

* Online variance divided by the number of elements if there | |

* is more than one sample. Normally the formula is division | |

* by nr_samples - 1 but we assume the number of element will be | |

* more than 32 and dividing by 32 instead of 31 is enough | |

* precise. | |

*/ | |

if (likely(irqs->nr_samples > 1)) | |

variance = irqs->variance >> IRQ_TIMINGS_SHIFT; | |

/* | |

* The rule of thumb in statistics for the normal distribution | |

* is having at least 30 samples in order to have the model to | |

* apply. Values outside the interval are considered as an | |

* anomaly. | |

*/ | |

if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) { | |

/* | |

* After three consecutive anomalies, we reset the | |

* stats as it is no longer stable enough. | |

*/ | |

if (irqs->anomalies++ >= 3) { | |

memset(irqs, 0, sizeof(*irqs)); | |

irqs->last_ts = ts; | |

return; | |

} | |

} else { | |

/* | |

* The anomalies must be consecutives, so at this | |

* point, we reset the anomalies counter. | |

*/ | |

irqs->anomalies = 0; | |

} | |

/* | |

* The interrupt is considered stable enough to try to predict | |

* the next event on it. | |

*/ | |

irqs->valid = 1; | |

/* | |

* Online average algorithm: | |

* | |

* new_average = average + ((value - average) / count) | |

* | |

* The variance computation depends on the new average | |

* to be computed here first. | |

* | |

*/ | |

irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT); | |

/* | |

* Online variance algorithm: | |

* | |

* new_variance = variance + (value - average) x (value - new_average) | |

* | |

* Warning: irqs->avg is updated with the line above, hence | |

* 'interval - irqs->avg' is no longer equal to 'diff' | |

*/ | |

irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); | |

/* | |

* Update the next event | |

*/ | |

irqs->next_evt = ts + irqs->avg; | |

} | |

/** | |

* irq_timings_next_event - Return when the next event is supposed to arrive | |

* | |

* During the last busy cycle, the number of interrupts is incremented | |

* and stored in the irq_timings structure. This information is | |

* necessary to: | |

* | |

* - know if the index in the table wrapped up: | |

* | |

* If more than the array size interrupts happened during the | |

* last busy/idle cycle, the index wrapped up and we have to | |

* begin with the next element in the array which is the last one | |

* in the sequence, otherwise it is a the index 0. | |

* | |

* - have an indication of the interrupts activity on this CPU | |

* (eg. irq/sec) | |

* | |

* The values are 'consumed' after inserting in the statistical model, | |

* thus the count is reinitialized. | |

* | |

* The array of values **must** be browsed in the time direction, the | |

* timestamp must increase between an element and the next one. | |

* | |

* Returns a nanosec time based estimation of the earliest interrupt, | |

* U64_MAX otherwise. | |

*/ | |

u64 irq_timings_next_event(u64 now) | |

{ | |

struct irq_timings *irqts = this_cpu_ptr(&irq_timings); | |

struct irqt_stat *irqs; | |

struct irqt_stat __percpu *s; | |

u64 ts, next_evt = U64_MAX; | |

int i, irq = 0; | |

/* | |

* This function must be called with the local irq disabled in | |

* order to prevent the timings circular buffer to be updated | |

* while we are reading it. | |

*/ | |

lockdep_assert_irqs_disabled(); | |

/* | |

* Number of elements in the circular buffer: If it happens it | |

* was flushed before, then the number of elements could be | |

* smaller than IRQ_TIMINGS_SIZE, so the count is used, | |

* otherwise the array size is used as we wrapped. The index | |

* begins from zero when we did not wrap. That could be done | |

* in a nicer way with the proper circular array structure | |

* type but with the cost of extra computation in the | |

* interrupt handler hot path. We choose efficiency. | |

* | |

* Inject measured irq/timestamp to the statistical model | |

* while decrementing the counter because we consume the data | |

* from our circular buffer. | |

*/ | |

for (i = irqts->count & IRQ_TIMINGS_MASK, | |

irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); | |

irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { | |

irq = irq_timing_decode(irqts->values[i], &ts); | |

s = idr_find(&irqt_stats, irq); | |

if (s) { | |

irqs = this_cpu_ptr(s); | |

irqs_update(irqs, ts); | |

} | |

} | |

/* | |

* Look in the list of interrupts' statistics, the earliest | |

* next event. | |

*/ | |

idr_for_each_entry(&irqt_stats, s, i) { | |

irqs = this_cpu_ptr(s); | |

if (!irqs->valid) | |

continue; | |

if (irqs->next_evt <= now) { | |

irq = i; | |

next_evt = now; | |

/* | |

* This interrupt mustn't use in the future | |

* until new events occur and update the | |

* statistics. | |

*/ | |

irqs->valid = 0; | |

break; | |

} | |

if (irqs->next_evt < next_evt) { | |

irq = i; | |

next_evt = irqs->next_evt; | |

} | |

} | |

return next_evt; | |

} | |

void irq_timings_free(int irq) | |

{ | |

struct irqt_stat __percpu *s; | |

s = idr_find(&irqt_stats, irq); | |

if (s) { | |

free_percpu(s); | |

idr_remove(&irqt_stats, irq); | |

} | |

} | |

int irq_timings_alloc(int irq) | |

{ | |

struct irqt_stat __percpu *s; | |

int id; | |

/* | |

* Some platforms can have the same private interrupt per cpu, | |

* so this function may be be called several times with the | |

* same interrupt number. Just bail out in case the per cpu | |

* stat structure is already allocated. | |

*/ | |

s = idr_find(&irqt_stats, irq); | |

if (s) | |

return 0; | |

s = alloc_percpu(*s); | |

if (!s) | |

return -ENOMEM; | |

idr_preload(GFP_KERNEL); | |

id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); | |

idr_preload_end(); | |

if (id < 0) { | |

free_percpu(s); | |

return id; | |

} | |

return 0; | |

} |