Linux提供了多种governor供用户选择,这里以interactive为例,毕竟现在的android手机中都是采用该governor.
基于linux 3.14
以下代码若未指明位置则默认在drivers/cpufreq/cpufreq_interactive.c中.
首先需要定义一个cpufreq_governor类型的结构体用来描述interactive governor.
struct cpufreq_governor cpufreq_gov_interactive = {
.name = "interactive",
.governor = cpufreq_governor_interactive,
.max_transition_latency = 10000000,
.owner = THIS_MODULE,
};
看一下cpufreq_governor结构体:
struct cpufreq_governor {
char name[CPUFREQ_NAME_LEN];
int initialized;
int (*governor) (struct cpufreq_policy *policy,
unsigned int event);
ssize_t (*show_setspeed) (struct cpufreq_policy *policy,
char *buf);
int (*store_setspeed) (struct cpufreq_policy *policy,
unsigned int freq);
unsigned int max_transition_latency; /* HW must be able to switch to
next freq faster than this value in nano secs or we
will fallback to performance governor */
struct list_head governor_list;
struct module *owner;
};
name:governor的名字,这里被赋值为interactive
initialized:初始化标志位
max_transition_latency:注释说的很清楚了,硬件从当前频率切换到下一个频率时所用的时间必须比max_transition_latency规定的时间小,否则governor将切换到performance.该数值以纳秒为单位.
governor_list:所有注册的governor都会被add到这个链表里面。
governor:这个calback用于控制governor的行为,比较重要,是governor的一个去切入点,后面会详解.
好了,现在我们静态的定义了一个interactive governor,在governor工作之前还要做一些初始化工作
static int __init cpufreq_interactive_init(void)
{
unsigned int i;
struct cpufreq_interactive_cpuinfo *pcpu;
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
/* Initalize per-cpu timers */
for_each_possible_cpu(i) {
pcpu = &per_cpu(cpuinfo, i);
init_timer_deferrable(&pcpu->cpu_timer);
pcpu->cpu_timer.function = cpufreq_interactive_timer;
pcpu->cpu_timer.data = i;
init_timer(&pcpu->cpu_slack_timer);
pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer;
spin_lock_init(&pcpu->load_lock);
spin_lock_init(&pcpu->target_freq_lock);
init_rwsem(&pcpu->enable_sem);
}
spin_lock_init(&speedchange_cpumask_lock);
mutex_init(&gov_lock);
speedchange_task =
kthread_create(cpufreq_interactive_speedchange_task, NULL,
"cfinteractive");
if (IS_ERR(speedchange_task))
return PTR_ERR(speedchange_task);
sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, ¶m);
get_task_struct(speedchange_task);
/* NB: wake up so the thread does not look hung to the freezer */
wake_up_process(speedchange_task);
return cpufreq_register_governor(&cpufreq_gov_interactive);
}
遍历可能的CPU
get到每个CPU的cpuinfo成员
初始化可延时定时器
设置定时器的function,定时器超时时会调用该函数
设置定时器的data,这里表示CPU ID
初始化slack定时器
设置该定时器的function,定时器超时时会调用该函数
初始化两个定时器的spin_lock
初始化可读信号量
创建一个线程cpufreq_interactive_speedchange_task,返回的进程描述符用speedchange_task保存
设置该线程的调度策略和调度参数
该线程的引用计数加1
唤醒speedchange_task
调用cpufreq_register_governor注册interactive governor
drivers/cpufreq/cpufreq.c
static LIST_HEAD(cpufreq_governor_list);
int cpufreq_register_governor(struct cpufreq_governor *governor)
{
int err;
if (!governor)
return -EINVAL;
if (cpufreq_disabled())
return -ENODEV;
mutex_lock(&cpufreq_governor_mutex);
governor->initialized = 0;
err = -EBUSY;
if (__find_governor(governor->name) == NULL) {
err = 0;
list_add(&governor->governor_list, &cpufreq_governor_list);
}
mutex_unlock(&cpufreq_governor_mutex);
return err;
}
EXPORT_SYMBOL_GPL(cpufreq_register_governor);
cpufreq_governor_list用来保存已注册的governor
__find_governor会在cpufreq_governor_list中遍历寻找是否有与需要register的governor重名的governor,如果没有则将该governor添加到cpufreq_governor_list中
好的,简单介绍了一下governor的定义,初始化,注册。
现在我们已经拥有了一个interactive governor,CPUFREQ core如果想操作governor进行选频,那么interactive governor必须对外提供一个interface以供调用,这就是cpufreq_governor结构体中的governor callback,下面来以这个interface为切入点分析governor是如何工作的.
The governor->governor callback is called with the current (or to-be-set)
cpufreq_policy struct for that CPU, and an unsigned int event. The
following events are currently defined:
CPUFREQ_GOV_START: This governor shall start its duty for the CPU
policy->cpu
CPUFREQ_GOV_STOP: This governor shall end its duty for the CPU
policy->cpu
CPUFREQ_GOV_LIMITS: The limits for CPU policy->cpu have changed to
policy->min and policy->max.
在前面的定义中有
.governor = cpufreq_governor_interactive,
下面来看一下cpufreq_governor_interactive,分段分析:
static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
unsigned int event)
{
int rc;
unsigned int j;
struct cpufreq_interactive_cpuinfo *pcpu;
struct cpufreq_frequency_table *freq_table;
struct cpufreq_interactive_tunables *tunables;
unsigned long flags;
定义了一堆变量:pcpu描述了cpu相关信息,结构体如下,用到的时候在看。
struct cpufreq_interactive_cpuinfo {
struct timer_list cpu_timer;
struct timer_list cpu_slack_timer;
spinlock_t load_lock; /* protects the next 4 fields */
u64 time_in_idle;
u64 time_in_idle_timestamp;
u64 cputime_speedadj;
u64 cputime_speedadj_timestamp;
struct cpufreq_policy *policy;
struct cpufreq_frequency_table *freq_table;
spinlock_t target_freq_lock; /*protects target freq */
unsigned int target_freq;
unsigned int floor_freq;
unsigned int max_freq;
u64 floor_validate_time;
u64 hispeed_validate_time;
struct rw_semaphore enable_sem;
int governor_enabled;
};
freq_tab表示频率表,结构体如下,你会发现这是一个node,每个node代表一个频点,很多node关联在一起就成了一个tab:
struct cpufreq_frequency_table {
unsigned int driver_data; /* driver specific data, not used by core */
unsigned int frequency; /* kHz - doesn't need to be in ascending
* order */
};
struct cpufreq_interactive_tunables *tunables;这个结构体很重要,贯穿了整个governor callback,先给出结构体,接下来在函数中边看边分析。
struct cpufreq_interactive_tunables {
int usage_count;
/* Hi speed to bump to from lo speed when load burst (default max) */
unsigned int hispeed_freq;
/* Go to hi speed when CPU load at or above this value. */
#define DEFAULT_GO_HISPEED_LOAD 99
unsigned long go_hispeed_load;
/* Target load. Lower values result in higher CPU speeds. */
spinlock_t target_loads_lock;
unsigned int *target_loads;
int ntarget_loads;
/*
* The minimum amount of time to spend at a frequency before we can ramp
* down.
*/
#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC)
unsigned long min_sample_time;
/*
* The sample rate of the timer used to increase frequency
*/
unsigned long timer_rate;
/*
* Wait this long before raising speed above hispeed, by default a
* single timer interval.
*/
spinlock_t above_hispeed_delay_lock;
unsigned int *above_hispeed_delay;
int nabove_hispeed_delay;
/* Non-zero means indefinite speed boost active */
int boost_val;
/* Duration of a boot pulse in usecs */
int boostpulse_duration_val;
/* End time of boost pulse in ktime converted to usecs */
u64 boostpulse_endtime;
bool boosted;
/*
* Max additional time to wait in idle, beyond timer_rate, at speeds
* above minimum before wakeup to reduce speed, or -1 if unnecessary.
*/
#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE)
int timer_slack_val;
bool io_is_busy;
};
回到cpufreq_governor_interactive函数,
if (have_governor_per_policy())
tunables = policy->governor_data;
else
tunables = common_tunables;
have_governor_per_policy判断是否每个policy都有自己的governor,我的项目中policy都采用interactive,所以这里tuables被赋值为common_tunables。
common_tunables被定义为:
/* For cases where we have single governor instance for system */
static struct cpufreq_interactive_tunables *common_tunables;
但是没有分配内存和初始