LINUX-4.4 内核启动流程分析

tech2022-08-11  149

此篇内容总共分为 5 部分, 第 5 部分 手工绘制的内核启动程序调用关系,可以参考函数名和文件路径, 清晰分析内核启动路线。

(1). 文件 arch/arm/kernel/vmlinux.lds.S 文件是kernel链接文件

ENTRY(stext) SECTIONS { .head.text : { _text = .; HEAD_TEXT } .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ IDMAP_TEXT __exception_text_start = .; *(.exception.text) __exception_text_end = .; IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT KPROBES_TEXT *(.gnu.warning) *(.glue_7) *(.glue_7t) . = ALIGN(4); *(.got) /* Global offset table */ ARM_CPU_KEEP(PROC_INFO) } }

(2). 文件 arch/arm/kernel/head.S 文件是kernel 启动文件

__HEAD ;// #define __HEAD .section ".head.text","ax" ENTRY(stext) ARM_BE8(setend be ) @ ensure we are in BE8 mode THUMB( badr r9, 1f ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install #endif @ ensure svc mode and all interrupts masked safe_svcmode_maskall r9 mrc p15, 0, r9, c0, c0 @ get processor id bl __lookup_processor_type @ r5=procinfo r9=cpuid movs r10, r5 @ invalid processor (r5=0)? THUMB( it eq ) @ force fixup-able long branch encoding beq __error_p @ yes, error 'p' #ifdef CONFIG_ARM_LPAE mrc p15, 0, r3, c0, c1, 4 @ read ID_MMFR0 and r3, r3, #0xf @ extract VMSA support cmp r3, #5 @ long-descriptor translation table format? THUMB( it lo ) @ force fixup-able long branch encoding blo __error_lpae @ only classic page table format #endif /* ... */ bl __vet_atags #ifdef CONFIG_SMP_ON_UP bl __fixup_smp #endif #ifdef CONFIG_ARM_PATCH_PHYS_VIRT bl __fixup_pv_table #endif bl __create_page_tables ldr r13, =__mmap_switched @ address to jump to after, 此程序跳转到 head_common.S 文件 @ mmu has been enabled badr lr, 1f @ return (PIC) address #ifdef CONFIG_ARM_LPAE mov r5, #0 @ high TTBR0 mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn #else mov r8, r4 @ set TTBR1 to swapper_pg_dir #endif ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 1: b __enable_mmu

(3). 文件 arch/arm/kernel/head_common.S

__INIT ;//#define __INIT .section ".init.text","ax" __mmap_switched: adr r3, __mmap_switched_data ldmia r3!, {r4, r5, r6, r7} cmp r4, r5 @ Copy data segment if needed 1: cmpne r5, r6 ldrne fp, [r4], #4 strne fp, [r5], #4 bne 1b mov fp, #0 @ Clear BSS (and zero fp) 1: cmp r6, r7 strcc fp, [r6],#4 bcc 1b ARM( ldmia r3, {r4, r5, r6, r7, sp}) THUMB( ldmia r3, {r4, r5, r6, r7} ) THUMB( ldr sp, [r3, #16] ) str r9, [r4] @ Save processor ID str r1, [r5] @ Save machine type str r2, [r6] @ Save atags pointer cmp r7, #0 strne r0, [r7] @ Save control register values b start_kernel ;// 跳转至 init/main.c 文件中 start_kernel(void) ENDPROC(__mmap_switched)

(4).文件 init/main.c 内核初始化程序

asmlinkage __visible void __init start_kernel(void) { char *command_line; char *after_dashes; /* * Need to run as early as possible, to initialize the * lockdep hash: */ lockdep_init(); set_task_stack_end_magic(&init_task); smp_setup_processor_id(); debug_objects_early_init(); /* * Set up the the initial canary ASAP: */ boot_init_stack_canary(); cgroup_init_early(); local_irq_disable(); early_boot_irqs_disabled = true; /* * Interrupts are still disabled. Do necessary setups, then * enable them */ boot_cpu_init(); page_address_init(); pr_notice("%s", linux_banner); setup_arch(&command_line); /** 处理uboot 传递过来的参数 */ mm_init_cpumask(&init_mm); setup_command_line(command_line); /** command line */ setup_nr_cpu_ids(); setup_per_cpu_areas(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ build_all_zonelists(NULL, NULL); page_alloc_init(); pr_notice("Kernel command line: %s\n", boot_command_line); /* parameters may set static keys */ jump_label_init(); parse_early_param(); after_dashes = parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, -1, -1, NULL, &unknown_bootoption); /** 处理 uboot 传递的 arg 参数 */ if (!IS_ERR_OR_NULL(after_dashes)) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, NULL, set_init_arg); /* * These use large bootmem allocations and must precede * kmem_cache_init() */ setup_log_buf(0); pidhash_init(); vfs_caches_init_early(); sort_main_extable(); trap_init(); mm_init(); /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() * time - but meanwhile we still have a functioning scheduler. */ sched_init(); /* * Disable preemption - early bootup scheduling is extremely * fragile until we cpu_idle() for the first time. */ preempt_disable(); if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); idr_init_cache(); rcu_init(); /* trace_printk() and trace points may be used after this */ trace_init(); context_tracking_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); tick_init(); rcu_init_nohz(); init_timers(); hrtimers_init(); softirq_init(); timekeeping_init(); time_init(); sched_clock_postinit(); perf_event_init(); profile_init(); call_function_init(); WARN(!irqs_disabled(), "Interrupts were enabled early\n"); early_boot_irqs_disabled = false; local_irq_enable(); kmem_cache_init_late(); /* * HACK ALERT! This is early. We're enabling the console before * we've done PCI setups etc, and console_init() must be aware of * this. But we do want output early, in case something goes wrong. */ console_init(); if (panic_later) panic("Too many boot %s vars at `%s'", panic_later, panic_param); lockdep_info(); /* * Need to run this when irqs are enabled, because it wants * to self-test [hard/soft]-irqs on/off lock inversion bugs * too: */ locking_selftest(); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n", page_to_pfn(virt_to_page((void *)initrd_start)), min_low_pfn); initrd_start = 0; } #endif page_ext_init(); debug_objects_mem_init(); kmemleak_init(); setup_per_cpu_pageset(); numa_policy_init(); if (late_time_init) late_time_init(); sched_clock_init(); calibrate_delay(); pidmap_init(); anon_vma_init(); acpi_early_init(); #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); #endif #ifdef CONFIG_X86_ESPFIX64 /* Should be run before the first non-init thread is created */ init_espfix_bsp(); #endif thread_info_cache_init(); cred_init(); fork_init(); proc_caches_init(); buffer_init(); key_init(); security_init(); dbg_late_init(); vfs_caches_init(); signals_init(); /* rootfs populating might need page-writeback */ page_writeback_init(); proc_root_init(); nsfs_init(); cpuset_init(); cgroup_init(); taskstats_init_early(); delayacct_init(); check_bugs(); acpi_subsystem_init(); sfi_init_late(); if (efi_enabled(EFI_RUNTIME_SERVICES)) { efi_late_init(); efi_free_boot_services(); } ftrace_init(); /* Do the rest non-__init'ed, we're now alive */ rest_init(); }

(5). 内核启动调度路线 可参考函数名和路径查看调用关系.

start_kernel(void) // init/main.c | |-setup_arch(&command_line); // 处理uboot 传递 bootcmd 参数 | |-parse_args(.....); // 处理uboot 传递 bootargs 参数 | unknown_bootoption | obsolete_checksetup() // 处理命令参数 'init=' ... | |-rest_init(); // init/main.c | |-kernel_init(); //函数指针 kernel_thread(kernel_init, NULL, CLONE_FS); | | | |-kernel_init_freeable(); // init/main.c | | | /* Wait until kthreadd is all set-up. */ | | |-wait_for_completion(&kthreadd_done); | | | /* init can allocate pages on any node */ | | |-set_mems_allowed(node_states[N_MEMORY]); | | | /* init can run on any cpu. */ | | |-set_cpus_allowed_ptr(current, cpu_all_mask); | | |-cad_pid = task_pid(current); | | |-smp_prepare_cpus(setup_max_cpus); | | |- .... /* 省略部分代码 */ | | |-smp_init(); | | |-sched_init_smp(); | | |-page_alloc_init_late(); | | | | | |-do_basic_setup(); // system 基础配置项 | | | | cpuset_init_smp(); // smp init | | | | shmem_init(); // single | | | |-driver_init(); // driver/base/init.c 驱动初始化 | | | | | | | | | |-devtmpfs_init(); /* 注册设备类型文件系统 */ | | | | | err = register_filesystem(&dev_fs_type); | | | | | thread = kthread_run(devtmpfsd, &err, "kdevtmpfs"); | | | | | /* 运行 devtmpfsd() 函数 ,创建文件系统 */ | | | | |-devices_init(); | | | | | kset_create_and_add("devices", &device_uevent_ops, NULL); | | | | | kobject_create_and_add("dev", NULL); | | | | | kobject_create_and_add("block", dev_kobj); | | | | | kobject_create_and_add("char", dev_kobj); | | | | | | | | | |-buses_init(); | | | | | kset_create_and_add("bus", &bus_uevent_ops, NULL); | | | | | kset_create_and_add("system", NULL, &devices_kset->kobj); | | | | | | | | | |-classes_init(); | | | | | kset_create_and_add("class", NULL, NULL); | | | | | | | | | |-firmware_init(); | | | | | firmware_kobj = kobject_create_and_add("firmware", NULL); | | | | | | | | | |-hypervisor_init(); | | | | | kobject_create_and_add("hypervisor", NULL); | | | | | | | | | |-platform_bus_init(); | | | | | early_platform_cleanup(); | | | | | device_register(&platform_bus); // platform_dev 设备初始化, | | | | | bus_register(&platform_bus_type); // platform_bus 初始化 | | | | | of_platform_register_reconfig_notifier(); | | | | | | | | | |-cpu_dev_init(); | | | | | subsys_system_register(&cpu_subsys, cpu_root_attr_groups) | | | | | cpu_dev_register_generic(); | | | | | cpu_register_vulnerabilities(); | | | | | | | | | |-memory_dev_init(); | | | | | subsys_system_register(&memory_subsys, | | | | | memory_root_attr_groups); | | | | |-container_dev_init(); | | | | | subsys_system_register(&container_subsys, NULL); | | | | | | | | | |-of_core_init(); | | | | kset_create_and_add("devicetree", NULL, firmware_kobj); | | | | proc_symlink("device-tree", NULL, | | | | "/sys/firmware/devicetree/base"); | | | |-init_irq_proc(); | | | | proc_mkdir("irq", NULL); | | | | register_irq_proc(irq, desc); | | | | | | | |-do_ctors(); | | | | usermodehelper_enable(); | | | | | | | |-do_initcalls(); | | | | do_initcall_level(level); | | | | for (fn = initcall_levels[level]; | | | | fn < initcall_levels[level+1]; fn++) | | | | do_one_initcall(*fn); | | | | | | | |-random_int_secret_init(); | | | /* Open the /dev/console on the rootfs, this should never fail */ | | |-sys_open((const char __user *) "/dev/console", O_RDWR, 0) | | | /* check if there is an early userspace init.*/ | | |-ramdisk_execute_command = "/init"; | | |-sys_access((const char __user *) ramdisk_execute_command, 0) | | | /* If yes, let it do all the work */ | | |-prepare_namespace(); | | | mount_root(); /* 挂载文件系统,启动文件管理线程 */ | | | /* rootfs is available now, try loading the public keys*/ | | |-integrity_load_keys(); | | |-load_default_modules(); */ and default modules | | /* need to finish all async __init code before freeing the memory */ | |-async_synchronize_full(); | | /* 来自于 env arg 参数 'init=',参数为空,则执行参数,'sbin/init ..' */ | |-ramdisk_execute_command = '' | |-run_init_process(ramdisk_execute_command); | | | |-try_to_run_init_process("/sbin/init") /* 执行此文件夹下的脚本程序 */ | |-try_to_run_init_process("/etc/init") /* 执行此文件夹下的脚本程序 */ | |-try_to_run_init_process("/bin/init") /* 执行此文件夹下的脚本程序 */ | |-try_to_run_init_process("/bin/sh") /* 执行此文件夹下的脚本程序 */ | /* Setup a clean context for our children to inherit. create kthreadd */ |-pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); | |-kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); | |-complete(&kthreadd_done); | /*The boot idle thread must execute schedule(), | * at least once to get things moving */ |-init_idle_bootup_task(current); | |-schedule_preempt_disabled(); | /* Call into cpu_idle with preempt disabled */ |-cpu_startup_entry(CPUHP_ONLINE); | |-arch_cpu_idle_prepare(); | |-cpu_idle_loop(); /* Linux kernel tick thread, idle thread */
最新回复(0)