diff --git a/README.md b/README.md new file mode 100644 index 0000000000000..ac75f570b634f --- /dev/null +++ b/README.md @@ -0,0 +1,322 @@ +# ARM 64 리눅스 커널 6.4.2 분석 + +## 커뮤니티: IAMROOT 20차 +- [www.iamroot.org][#iamroot] | IAMROOT 홈페이지 +- [jake.dothome.co.kr][#moonc] | 문c 블로그 + +[#iamroot]: http://www.iamroot.org +[#moonc]: http://jake.dothome.co.kr + +## 스터디 멤버 +- 이대로 +- 임채훈 +- 조수민 +- 최경건 + +## History + +### 77 주차 +- 2024.10.19 2명(임채훈, 최경건) +- setup_arch + - bootmem_init + - zone_sizes_init + - free_area_init + - memmap_init + - memmap_init_zone_range + - init_unavailable_range + +### 76 주차 +- 2024.10.12 2명(임채훈, 최경건) +- setup_arch + - bootmem_init + - zone_sizes_init + - free_area_init + - memmap_init + - memmap_init_zone_range + - init_unavailable_range + - pfn_valid + +### 75 주차 +- 2024.10.05 2명(이대로, 임채훈) +- setup_arch + - bootmem_init + - zone_sizes_init + - free_area_init + - memmap_init + - memmap_init_zone_range + +### 74 주차 +- 2024.09.28 2명(이대로, 임채훈) +- setup_arch + - bootmem_init + - zone_sizes_init + - free_area_init + - memmap_init + - memmap_init_zone_range + - memmap_init_range + +### 73 주차 +- 2024.09.21 2명(임채훈, 최경건) +- free_area_init + - memmap_init + - memmap_init_range + - defer_init + +### 72 주차 +- 2024.09.07 2명(임채훈, 최경건) +- free_area_init_node + - free_area_init_core + +### 71 주차 +- 2024.08.31 2명(임채훈, 최경건) +- free_area_init_core + - zone_init_innternals + - set_pageblock_order + +### 70 주차 +- 2024.08.24 3명(이대로, 임채훈, 최경건) +- free_area_init + - free_area_init_node + - free_area_init_core + +### 69 주차 +- 2024.08.10 1명(임채훈) +- [문C 블로그 zone_sizes_init](http://jake.dothome.co.kr/free_area_init_node) 정독 + +### 68 주차 +- 2024.08.10 4명(이대로, 임채훈, 조수민, 최경건) +- free_area_init + - find_zone_movable_pfns_for_nodes + +### 67 주차 +- 2024.08.03 4명(이대로, 임채훈, 조수민, 최경건) +- sparse_init +- zone_sizes_init + +### 66 주차 +- 2024.07.27 4명(이대로, 임채훈, 조수민, 최경건) +- sparse_init +- sparse_init_nid +- __populate_section_memmap + - vmemmap_populate + - vmemmap_populate_hugepages +- check_usemap_section_nr ~ + +### 65 주차 +- 2024.07.20 2명(이대로, 임채훈) +- arm64_hugetlb_cma_reserve +- dma_pernuma_cma_reserve +- sparse_init ~ + +### 64 주차 +- 2024.07.13 2명(임채훈, 최경건) +- arm64_hugetlb_cma_reserve + +### 63 주차 +- 2024.07.06 2명(임채훈, 최경건) + +### 62 주차 +- 2024.06.29 2명(임채훈, 최경건) + +### 61 주차 +- 2024.06.22 3명(임채훈, 조수민, 최경건) + +### 60 주차 +- 2024.06.15 4명(이대로, 임채훈, 조수민, 최경건) + +### 59 주차 +- 2024.06.08 3명(임채훈, 조수민, 최경건) + +### 58 주차 +- 2024.06.01 1명(임채훈) + +### 57 주차 +- 2024.05.25 4명(이대로, 임채훈, 조수민, 최경건) + +### 56 주차 +- 2024.05.18 3명(이대로, 임채훈, 조수민) + +### 55 주차 +- 2024.05.11 3명(이대로, 임채훈, 조수민) + +### 54 주차 +- 2024.05.04 +- 어린이날 연휴 + +### 53 주차 +- 2024.04.27 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) + +### 52 주차 +- 2024.04.20 5명(이대로, 임채훈, 최경건, ??) + +### 51 주차 +- 2024.04.13 3명(이대로, 임채훈, 최경건) + +### 50 주차 +- 2024.04.06 4명(이대로, 임채훈, 최경건) + +### 49 주차 +- 2024.03.30 2명(박성수, 임채훈) + +### 48 주차 +- 2024.03.26 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) + +### 47 주차 +- 2024.03.23 2명(이대로, 임채훈) + +### 46 주차 +- 2024.03.16 (K, ruffalo2881, 박성수, 이대로, 임채훈, 조수민) + +### 45 주차 +- 2024.03.09 (박성수, 이대로, 임채훈, 조수민) + +### 44 주차 +- 2024.03.02 5명(박성수, 이대로, 임채훈, 조수민) + +### 43 주차 +- 2024.02.24 7명(K, 김형우, 박성수, 이대로, 임채훈, 조수민, 최경건) + +### 42 주차 +- 2024.02.17 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) + +### 41 주차 +- 2024.02.10 +- 설 연휴 + +### 40 주차 +- 2024.02.03 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) + +### 39 주차 +- 2024.01.27 6명(김형우, 박성수, 이대로, 임채훈, 조수민, 최경건) + +### 38 주차 +- 2024.01.20 7명(K, 박성수, 이대로, 임채훈, 최경건) + +### 37 주차 +- 2024.01.13 5명(K, 김해성, 박성수, 임채훈, 최경건) + +### 36 주차 +- 2024.01.06 6명(K, 박성수, 이대로, 임채훈, 최경건) + +### 35 주차 +- 2023.12.30 +- 새해 연휴 + +### 34 주차 +- 2023.12.23 4명 + +### 33 주차 +- 2023.12.16 3명 + +### 32 주차 +- 2023.12.09 4명 + +### 31 주차 +- 2023.12.02 6명 + +### 30 주차 +- 2023.11.25 8명 + +### 29 주차 +- 2023.11.18 8명 + +### 28 주차 +- 2023.11.11 10명 + +### 27 주차 +- 2023.11.04 9명 + +### 26 주차 +- 2023.10.28 11명 + +### 25 주차 +- 2023.10.21 10명 + +### 24 주차 +- 2023.10.14 10명 +- Arm 아키텍처의 구조와 원리 + +### 23 주차 +- 2023.10.07 12명 +- Arm 아키텍처의 구조와 원리 + +### 22 주차 +- 2023.09.30 +- 추석 + +### 21 주차 +- 2023.09.23 12명 +- Arm 아키텍처의 구조와 원리 + +### 20 주차 +- 2023.09.16 9명 +- Arm 아키텍처의 구조와 원리 + +### 19 주차 +- 2023.09.09 11명 +- Arm 아키텍처의 구조와 원리 + +### 18 주차 +- 2023.09.02 10명 +- Arm 아키텍처의 구조와 원리 + +### 17 주차 +- 2023.08.26 10명 +- Arm 아키텍처의 구조와 원리 + +### 16 주차 +- 2023.08.19 12명 + +### 15 주차 +- 2023.08.12 12명 + +### 14 주차 +- 2023.08.05 17명 + +### 13 주차 +- 2023.07.29 19명 + +### 12 주차 +- 2023.07.22 18명 + +### 11 주차 +- 2023.07.15 22명 + +### 10 주차 +- 2023.07.08 22명 + +### 9 주차 +- 2023.07.01 22명 +- ARM Architecture + +### 8 주차 +- 2023.06.24 22명 +- ARM Architecture + +### 7 주차 +- 2023.06.17 20명 +- ARM Architecture + +### 6 주차 +- 2023.06.10 15명 +- 리눅스 커널 내부 구조 8장 + +### 5 주차 +- 2023.06.03 +- 리눅스 커널 내부 구조 + +### 4 주차 +- 2023.05.27 +- 부처님 오신날 + +### 3 주차 +- 2023.05.20 25명 +- 리눅스 커널 내부 구조 4 ~ 5장 + +### 2 주차 +- 2023.05.13 40명 +- 리눅스 커널 내부 구조 3 ~ 4장 + +### 1 주차 +- 2023.05.06 (약 47명으로 시작) +- 리눅스 커널 내부 구조 1 ~2장 diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index bdf1f6bcd0103..7e3a2389950a8 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -226,9 +226,40 @@ alternative_endif static __always_inline bool alternative_has_feature_likely(const unsigned long feature) { + /* IAMROOT20 20240427 + * ARM64_NCAPS = 88 + */ compiletime_assert(feature < ARM64_NCAPS, "feature must be < ARM64_NCAPS"); + /* IAMROOT20 20240427 + * ALTERNATIVE_CB(oldinstr, feature, cb) + * __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) + * | (ARM64_ALWAYS_SYSTEM), 1, alt_cb_patch_nops) + * + * + * if cfg_enabled == 1 + * 661: + * oldinstr( "b %l[l_no]" ) + * 662: + * .pushsection .altinstructions,"a" + * + * // ALTINSTR_ENTRY_CB(ARM64_ALWAYS_SYSTEM, alt_cb_patch_nops) + * .word 661b - . label + * .word __stringify(alt_cb_patch_nops) - . callback + * .hword __stringify(ARM64_ALWAYS_SYSTEM) feature bit + * .byte 662b-661b source len + * .byte 664f-663f replacement len + * + * .popsection + * 663: + * 664: + * endif + * return true; + * + * l_no: + * return false + */ asm_volatile_goto( ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) : diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 4081b8b685e97..4e1eb8165492c 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -392,6 +392,11 @@ alternative_cb_end * this number conveniently equals the number of leading zeroes in * the physical address of _end. */ + /* IAMROOT20 20231021 + * reg <- 0x0000_0000_42c8_0000(_end) + * reg <- 0x0000_0000_42c8_0000 | (0x0001_0000_0000_0000 - 1) = 0x0000_0000_42c8_0000 | 0x0000_ffff_ffff_ffff = 0x0000_ffff_ffff_ffff + * reg = 16 (clz: MSB부터 0의 숫자를 카운트) + */ .macro idmap_get_t0sz, reg adrp \reg, _end orr \reg, \reg, #(1 << VA_BITS_MIN) - 1 @@ -415,7 +420,28 @@ alternative_cb_end csel \tmp0, \tmp1, \tmp0, hi bfi \tcr, \tmp0, \pos, #3 .endm - + + /* IAMROOT20 20230916: + * __dcache_op_workaround_clean_cache cvac __idmap_text_start + * .pushsection .altinstructions, "a" + * // arm64/include/asm/alternative.h + * .word 661f - . + * .word 663f - . + * .hword ARM64_WORKAROUND_CLEAN_CACHE + * .byte 662f-661f + * .byte 664f-663f + * .popsection + * 661: + * dc cvac, __idmap_text_start + * 662: + * .subsection 1 + * 663: + * dc civac, __idmap_text_start + * 664: + * .org . - (664b-663b) + (662b-661b) + * .org . - (662b-661b) + (664b-663b) + * .previous + */ .macro __dcache_op_workaround_clean_cache, op, addr alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE dc \op, \addr @@ -436,7 +462,19 @@ alternative_endif * fixup: optional label to branch to on user fault * Corrupts: start, end, tmp */ + /* + * IAMROOT20 20230909: + * cache.S::dcache_clean_poc dcache_by_line_op cvac, sy, x0, x1, x2, x3 + * exam) dcache_by_myline_op(cvac, sy, __idmap_text_start, __idmap_text_end, x2, x3) + */ .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup + /* + * IAMROOT20 20230909: + * tmp = linesz - 1; tmp = 63 + * start = start & ~(tmp); // start을 캐쉬라인 사이즈로 align + * dcache_op@: + * dc civac start + */ sub \tmp, \linesz, #1 bic \start, \start, \tmp .Ldcache_op\@: @@ -457,11 +495,20 @@ alternative_endif .endif .endif .endif + /* + * IAMROOT20 20230909: + * start += linesz; start += 64 + * if(start < end) goto dcache_op@; + * dsb domain; dsb sy + */ add \start, \start, \linesz cmp \start, \end b.lo .Ldcache_op\@ dsb \domain +/* + * IAMROOT20_END 20230909 + */ _cond_uaccess_extable .Ldcache_op\@, \fixup .endm @@ -477,7 +524,17 @@ alternative_endif * Corrupts: start, end, tmp1, tmp2 */ .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup + /* + * IAMROOT20 20230916: + * tmp1 = cache_line_size 예) 64 + */ dcache_line_size \tmp1, \tmp2 + /* + * IAMROOT20 20230916: + * exam) dcache_by_line_op cvac, sy, x0, x1, x2, x3 + * x2 = 64 + * dcache_by_myline_op cvac, sy, __idmap_text_start, __idmap_text_end, x2, x3 + */ dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup .endm @@ -638,6 +695,10 @@ alternative_endif #ifdef CONFIG_ARM64_VA_BITS_52 mrs_s \tmp, SYS_ID_AA64MMFR2_EL1 and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT) + /* IAMROOT20 20231028 + * VA=52로 사용하는데, LVA(VA=52 support)가 지원되지 않는 시스템의 경우 + * VA=48로 운영하기 위해 PGD 테이블 위치를 offset만큼 더한 주소로 적용 + */ cbnz \tmp, .Lskipoffs_\@ orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET .Lskipoffs_\@ : @@ -654,7 +715,11 @@ alternative_endif .macro phys_to_ttbr, ttbr, phys #ifdef CONFIG_ARM64_PA_BITS_52 orr \ttbr, \phys, \phys, lsr #46 + /* IAMROOT20 20231021 + * TTBR_BADDR_MASK_52 : 0x0000_ffff_ffff_fffc + */ and \ttbr, \ttbr, #TTBR_BADDR_MASK_52 + /* IAMROOT20_END 20231021 */ /* IAMROOT20_START 20231028 */ #else mov \ttbr, \phys #endif @@ -666,9 +731,17 @@ alternative_endif * We assume \phys is 64K aligned and this is guaranteed by only * supporting this configuration with 64K pages. */ + /* + * IAMROOT20 20230916: + * pte = (phys | (phys >> 36) & 0x0000_ffff_ffff_f000); + */ orr \pte, \phys, \phys, lsr #36 and \pte, \pte, #PTE_ADDR_MASK #else + /* + * IAMROOT20 20230909: + * pte = phys; + */ mov \pte, \phys #endif .endm diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index cf2987464c186..669d1d9b6a2f6 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -80,6 +80,10 @@ * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz * and 0 otherwise. */ +/* IAMROOT20 20240706 + * idx가 sz보다 크면 0, 작으면 ULONG_MAX 반환 + * sbc : Carry 플래그를 사용하여 빼기 연산 수행 + */ #define array_index_mask_nospec array_index_mask_nospec static inline unsigned long array_index_mask_nospec(unsigned long idx, unsigned long sz) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 6bf013fb110d7..bb6ac2edb93c7 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -811,6 +811,14 @@ static inline bool system_has_full_ptr_auth(void) static __always_inline bool system_uses_irq_prio_masking(void) { + /* IAMROOT20 20240427 + * PSUDO_NMI : daif를 set해도 마스킹이 되지 않는 인터럽트(NMI)를 + * 활성화하기 위해 GIC를 사용하여 인터럽트를 발생시킨다. + * GIC : Generic Interrupt Controller + * - 인터럽트를 받아서 분배하는 하드웨어 모듈 + * + * ARM64_HAS_GIC_PRIO_MASKING = 27 + */ return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && cpus_have_const_cap(ARM64_HAS_GIC_PRIO_MASKING); } diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 55f57dfa8e2fe..b4a888b170e88 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -25,6 +25,9 @@ static inline void local_daif_mask(void) (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF | GIC_PRIO_PSR_I_SET))); + /* IAMROOT20 20240511 + * "msr daifset, #0xf" : daif를 모두 masking 해서 인터럽트가 발생하지 않음 + */ asm volatile( "msr daifset, #0xf // local_daif_mask\n" : @@ -42,9 +45,17 @@ static inline unsigned long local_daif_save_flags(void) { unsigned long flags; + /* IAMROOT20 20240427 + * mrs flags, daif + */ flags = read_sysreg(daif); if (system_uses_irq_prio_masking()) { + /* IAMROOT20 20240427 + * ICC_PMR_EL1 : 특정 인터럽트 필터보다 더 높은 수준의 + * 인터럽트가 발생했을 때만 core에게 신호를 보낸다. + */ + /* IAMROOT20_END 20240427 */ /* If IRQs are masked with PMR, reflect it in the flags */ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) flags |= PSR_I_BIT | PSR_F_BIT; @@ -59,6 +70,7 @@ static inline unsigned long local_daif_save(void) flags = local_daif_save_flags(); + /* IAMROOT20_START 20240511 */ local_daif_mask(); return flags; @@ -114,6 +126,9 @@ static inline void local_daif_restore(unsigned long flags) gic_write_pmr(pmr); } + /* IAMROOT20 20240511 + * msr daif, flags + */ write_sysreg(flags, daif); if (irq_disabled) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 037724b19c5c8..cd5172db1a242 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -197,6 +197,22 @@ .endm #ifndef __KVM_NVHE_HYPERVISOR__ +/* IAMROOT20 20231223 + * bool check_override() + * { + * u64 val = IdReg[fld:fld+width]; + * if(val == 0) + * return false; + * + * val = IdReg_override.val; + * u64 mask = IdReg_override.mask; + * if(mask == 0) + * return true; + * if(val & mask) + * return true; + * return false; + * } + */ // This will clobber tmp1 and tmp2, and expect tmp1 to contain // the id register value as read from the HW .macro __check_override idreg, fld, width, pass, fail, tmp1, tmp2 @@ -234,6 +250,12 @@ .endm #endif +/* IAMROOT20 20231223 + * override된 필드를 확인하여, 각 필드와 연결된 레지스터의 값을 업데이트 한다. + * SVE, SME 관련 작업들이 주로 업데이트 됨. + * - SVE, SME와 관련된 작업을 할 경우, Trap 시키지 않음. + * - 최대 크기의 벡터를 설정함. + */ .macro finalise_el2_state check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 58c294a966768..ca0c21e167c9d 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -67,12 +67,22 @@ enum fixed_addresses { FIX_ENTRY_TRAMP_TEXT1, #define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ + /* + * IAMROOT20 20231130: + * __end_of_permanent_fixed_addresses 523 + */ __end_of_permanent_fixed_addresses, /* * Temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. */ + + /* IAMROOT20 20240217 + * NR_FIX_BITMAPS = 0x40000 / 2^12 = 64 + * TOTAL_FIX_BITMAPS = 448 + * FIX_BITMAP_BEGIN = 523 + 447 = 970 + */ #define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) #define FIX_BTMAPS_SLOTS 7 #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) @@ -89,9 +99,20 @@ enum fixed_addresses { FIX_PUD, FIX_PGD, + /* + * IAMROOT20 20231130: + * __end_of_fixed_addresses 975 + */ __end_of_fixed_addresses }; +/* + * IAMROOT20 20231130: + * FIXADDR_SIZE 0x0020_b000 SZ_523_PAGE + * FIXADDR_START 0xffff_fbff_fddf_5000 + * FIXADDR_TOT_SIZE 0x003c_f000 SZ_975_PAGE + * FIXADDR_TOT_START 0xffff_fbff_fdc3_1000 + */ #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) #define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index e0f5f6b73edd7..e6864a2d999d6 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -56,6 +56,9 @@ static inline void arch_local_irq_enable(void) } } +/* IAMROOT20 20240120 + msr daifset, #3 명령어로 irq disable + */ static __always_inline void __daif_local_irq_disable(void) { barrier(); @@ -75,6 +78,13 @@ static __always_inline void __pmr_local_irq_disable(void) barrier(); } +/* IAMROOT20 20240120 + arch로 시작하는 함수이므로 arch/arm64 경로로 찾아옴 + gicv3의 PMR 기능을 사용할 경우 __pmr_local_irq_disable 함수를 호출하고 + 아닐 경우 __daif_local_irq_disable 함수를 호출 + PMR, PSEUDO_NMI 관련 내용은 추후에 다시 알아보기로 하고 daif로 따라감 + https://github.com/iamroot18/5.10/blob/i515/arch/arm64/include/asm/ptrace.h + */ static inline void arch_local_irq_disable(void) { if (__irqflags_uses_pmr()) { diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index fd258fa4c6288..4af95d1dec9b3 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -63,6 +63,9 @@ #define EARLY_KASLR (0) #endif +/* IAMROOT20 20231209 + * vstart ~ vend 에서 shift 크기가 몇 개 들어갈 수 있는지를 구함 + */ #define SPAN_NR_ENTRIES(vstart, vend, shift) \ ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1) @@ -143,6 +146,13 @@ * has a direct correspondence, and needs to appear sufficiently aligned * in the virtual address space. */ +/* IAMROOT20 20240330 + * ex) 4k page 기준 + * ARM64_MEMSTART_SHIFT = 30 + * SECTION_SIZE_BITS = 27 + * ARM64_MEMSTART_SHIFT > SECTION_SIZE_BITS 이므로 + * ARM64_MEMSTART_ALIGN = 0x0000_0000_4000_0000(1GB) + */ #if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS #define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) #else diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c735afdf639b1..e22e02a7b1bef 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -30,6 +30,12 @@ * keep a constant PAGE_OFFSET and "fallback" to using the higher end * of the VMEMMAP where 52-bit support is not available in hardware. */ + /* + * IAMROOT20 20231129: + * exam) VA_BITS 48, VA_BITS_MIN 48 + * VMEMMAP_SHIFT 6 + * VMEMMAP_SIZE 0x0200_0000_0000 SZ_2T + */ #define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT) @@ -40,6 +46,21 @@ * KIMAGE_VADDR - the virtual address of the start of the kernel image. * VA_BITS - the maximum number of bits for virtual addresses. */ + /* + * IAMROOT20 20231129: + * VA_BITS 48 + * VA_BITS_MIN 48 + * PAGE_OFFSET 0xffff_0000_0000_0000 + * KIMAGE_VADDR 0xffff_8000_0800_0000 + * MODULES_END 0xffff_8000_0800_0000 + * MODULES_VADDR 0xffff_8000_0000_0000 + * MODULES_VSIZE 0x0800_0000 SZ_128M + * VMEMMAP_START 0xffff_fc00_0000_0000 + * VMEMMAP_END 0xffff_fe00_0000_0000 + * PCI_IO_END 0xffff_fbff_ff80_0000 + * PCI_IO_START 0xffff_fbff_fe80_0000 + * FIXADDR_TOP 0xffff_fbff_fe00_0000 + */ #define VA_BITS (CONFIG_ARM64_VA_BITS) #define _PAGE_OFFSET(va) (-(UL(1) << (va))) #define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) @@ -59,6 +80,10 @@ #define VA_BITS_MIN (VA_BITS) #endif +/* IAMROOT20 20240426 + * exam) va : 48 + * _PAGE_END(48) 0xffff800000000000 (-0x800000000000) + */ #define _PAGE_END(va) (-(UL(1) << ((va) - 1))) #define KERNEL_START _text @@ -77,9 +102,16 @@ #define KASAN_THREAD_SHIFT 1 #else #define KASAN_THREAD_SHIFT 0 +/* + * IAMROOT20 20231202: + * PAGE_END 0xffff_8000_0000_0000 + */ #define PAGE_END (_PAGE_END(VA_BITS_MIN)) #endif /* CONFIG_KASAN */ +/* IAMROOT20 20240106 + * MIN_THREAD_SHIFT 14 or 15(CONFIG_KASAN_GENERIC) + */ #define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) /* @@ -89,6 +121,10 @@ #if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT) #define THREAD_SHIFT PAGE_SHIFT #else +/* IAMROOT20 20240106 + * exam) PAGE_SHIFT 12 + * THREAD_SHIFT 14 or 15 + */ #define THREAD_SHIFT MIN_THREAD_SHIFT #endif @@ -96,6 +132,11 @@ #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) #endif +/* IAMROOT20 20240106 + * + * exam) PAGE_SHIFT 12 일경우 + * THREAD_SIZE 16k or 32k + */ #define THREAD_SIZE (UL(1) << THREAD_SHIFT) /* @@ -204,6 +245,9 @@ static inline unsigned long kaslr_offset(void) return kimage_vaddr - KIMAGE_VADDR; } +/* IAMROOT20 20240217 + * kalsr_offset이 2MB 이상이면 kaslr이 enable되었다고 판단함 + */ static inline bool kaslr_enabled(void) { /* @@ -285,6 +329,12 @@ static inline const void *__tag_set(const void *addr, u8 tag) * lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the * kernel's TTBR1 address range. */ + /* + * IAMROOT20 20231202: + * __is_lm_address(addr) => PAGE_OFFSET <= addr < PAGE_END + * exam) VA_BITS : 48 + * 0xffff_0000_0000_0000 ~ 0xffff_8000_0000_0000 + */ #define __is_lm_address(addr) (((u64)(addr) - PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) #define __lm_to_phys(addr) (((addr) - PAGE_OFFSET) + PHYS_OFFSET) @@ -335,6 +385,10 @@ static inline void *phys_to_virt(phys_addr_t x) * Drivers should NOT use these either. */ #define __pa(x) __virt_to_phys((unsigned long)(x)) + /* + * IAMROOT20 20231202: + * __pa_symbol(x) -> (x - kimage_voffset) + */ #define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0)) #define __pa_nodebug(x) __virt_to_phys_nodebug((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 56911691bef05..531df05da191a 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -53,6 +53,11 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) { BUG_ON(pgd == swapper_pg_dir); cpu_set_reserved_ttbr0(); + /* IAMROOT20 20240427 + * cpu_do_switch_mm() + * - ttbr0를 pgd로 설정한다. + * - pgd, ttbr1의 ASID 필드에 mm의 asid를 설정한다. + */ cpu_do_switch_mm(virt_to_phys(pgd),mm); } @@ -96,12 +101,29 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) */ static inline void cpu_uninstall_idmap(void) { + /* IAMROOT20 20240330 + * current는 init_task (head.S에서 sp_el0에 init_task 주소를 적어놨음) + * current->active_mm은 init_mm을 가리킴 (현재 init_mm.pgd는 init_pg_dir이 들어있음) + */ struct mm_struct *mm = current->active_mm; + /* IAMROOT20 20240330 + * reserved_pg_dir의 물리 주소를 ttbr0_el1에 write + * reserved_pg_dir은 아무 정보가 없는 zero page table + */ cpu_set_reserved_ttbr0(); + /* IAMROOT20 20240330 + * tlb invalidation + */ local_flush_tlb_all(); + /* IAMROOT20 20240330 + * tcr_el1.t0sz의 값을 현재 사용중인 va bit로 설정해줌 + */ cpu_set_default_tcr_t0sz(); + /* IAMROOT20 20240330 + * mm과 init_mm이 같으므로 현재 호출되지 않음 + */ if (mm != &init_mm && !system_uses_ttbr0_pan()) cpu_switch_mm(mm->pgd, mm); } @@ -156,6 +178,9 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) unsigned long daif; /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ + /* IAMROOT20 20240427 + * swapper_pg_dir의 phys address를 구해서 ttbr1에 저장 + */ phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) { @@ -172,6 +197,10 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); + /* IAMROOT20 20240427 + * idmap을 ttbr0으로 설정하고, + * ttbr0, ttbr1의 ASID를 init_mm의 asid로 설정한다. + */ __cpu_install_idmap(idmap); /* @@ -179,6 +208,11 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) * in the process of being replaced so mask everything. */ daif = local_daif_save(); + /* IAMROOT20 20240511 + * msr ttbr1_el1, ttbr1 + * + * replace_phys는 idmap에 매핑된 주소이다.(ttbr0 사용) + */ replace_phys(ttbr1); local_daif_restore(daif); diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index 2403f7b4cdbfb..1be672f578563 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -11,6 +11,12 @@ #include /* PAGE_SHIFT determines the page size */ +/* IAMROOT20 20231202 + * exam) VA_BITS 48, PAGE_SHIFT 12 + * PAGE_SHIFT 12 + * PAGE_SIZE SZ_4K + * PAGE_MASK 0XFFFF_FFFF_FFFF_F800 + */ #define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index b9ba19dbdb694..42a5d99cef233 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -29,6 +29,10 @@ static inline unsigned long __hyp_my_cpu_offset(void) return read_sysreg(tpidr_el2); } +/* IAMROOT20 20240127 + * mrs off, tpidr_el1 + * tpidr_el1값을 가져와서 반환 + */ static inline unsigned long __kern_my_cpu_offset(void) { unsigned long off; @@ -49,6 +53,9 @@ static inline unsigned long __kern_my_cpu_offset(void) #ifdef __KVM_NVHE_HYPERVISOR__ #define __my_cpu_offset __hyp_my_cpu_offset() #else +/* IAMROOT20 20240127 + * tpidr_el1값을 반환 + */ #define __my_cpu_offset __kern_my_cpu_offset() #endif diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 237224484d0f6..b9d255ac9665e 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -22,6 +22,9 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) { + /* IAMROOT20 20231202, 20240420 + * (*pudp) = (pmdp | prot) + */ set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); } @@ -41,6 +44,9 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) #if CONFIG_PGTABLE_LEVELS > 3 +/** IAMROOT20 20231202 + * *p4dp = (pudp | prot); + */ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) { set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); @@ -66,6 +72,11 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, pmdval_t prot) { + /* IAMROOT20 20231209 + * __pmd(__phys_to_pmd_val(ptep) | prot = ptep | prot + * + * *pmdp = ptep | prot + */ set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot)); } diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index f658aafc47dfa..c6404160f5262 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -46,6 +46,17 @@ * PMD_SHIFT determines the size a level 2 page table entry can map. */ #if CONFIG_PGTABLE_LEVELS > 2 + /* + * IAMROOT20 20230916: + * exam) 39 VA_BITS, 4k + * PMD_SHIFT 21 + * PMD_SIZE SZ_2M + * exam) 48 VA_BITS, 4k + * PMD_SHIFT 21 + * PMD_SIZE SZ_2M + * PMD_MASK 0xffff_ffff_ffe0_0000 + * PTRS_PER_PMD 512 + */ #define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) @@ -56,6 +67,14 @@ * PUD_SHIFT determines the size a level 1 page table entry can map. */ #if CONFIG_PGTABLE_LEVELS > 3 + /* + * IAMROOT20 20231201: + * exam) 48 VA_BITS, 4k + * PUD_SHIFT 30 + * PUD_SIZE SZ_1G + * PUD_MASK 0xffff_ffff_c000_0000 + * PTRS_PER_PUD 512 + */ #define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) @@ -66,6 +85,22 @@ * PGDIR_SHIFT determines the size a top-level page table entry can map * (depending on the configuration, this level can be 0, 1 or 2). */ + /* + * IAMROOT20 20230916: + * exam) VA_BITS==36 on 16k(14bit) CONFIG_PGTABLE_LEVELS = 2 + * PGDIR_SHIFT : 25 + * PGDIR_SIZE : 1 << 25 SZ_32M + * PGDIR_MASK : 0xffff_ffff_fe00_0000 + * PTRS_PER_PGD : 11 (36 - 25) + * exam) VA_BITS == 52 on 64k(16bit) CONFIG_PGTABLE_LEVELS = 3 + PGDIR_SHIFT : 42 + PGDIR_SIZE : SZ_4T + * exam) VA_BITS == 48 on 4k CONFIG_PGTABLE_LEVELS = 4 + * PGDIR_SHIFT 39 + * PGDIR_SIZE SZ_512G + * PGDIR_MASK 0xffff_ffc0_0000_0000 + * PTRS_PER_PGD 512 + */ #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -74,11 +109,30 @@ /* * Contiguous page definitions. */ +/* IAMROOT20 20231216 + * exam) VA_BITS == 48, 4k + * CONT_PTE_SHIFT 16 + * CONT_PTES 16 + * CONT_PTE_SIZE SZ_64K + * CONT_PTE_MASK 0xffff_ffff_ffff_0000 + */ #define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT) #define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT)) #define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) #define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) +/* IAMROOT20 20231209 + * CONFIG_ARM64_CONT_PMD_SHIFT = 4(arm64 defconfig) + * exam) 4K, 4-level + * CONT_PMD_SHIFT = 4 + 21 + * CONT_PMDS = (1 << 4) = 16 + * CONT_PMD_SIZE = 16 * SZ_2M = 32MB + * CONT_PMD_MASK = ~(32M - 1) + */ +/* IAMROOT20 20240706 + * exam) 16K, 4-level + * CONT_PMD_SHIFT = 5 + 25 + */ #define CONT_PMD_SHIFT (CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT) #define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) @@ -155,12 +209,32 @@ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ +/* + * IAMROOT20 20230916: + * CONFIG_ARM64_PA_BITS_52 일경우 + * PAGE_SHIFT 16 + * PTE_ADDR_LOW 0x0000_ffff_ffff_0000 ((1 << (48 - 16)) - 1) << 16 + * CONFIG_ARM64_PA_BITS_48 일경우 + * PAGE_SHIFT 12 + * PTE_ADDR_LOW 0x0000_ffff_ffff_f000 ((1 << (48 - 12)) - 1) << 12 + */ #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 +/* + * IAMROOT20 20230916: + * PTE_ADDR_HIGH 0xf << 12 0xf000 + * PTE_ADDR_MASK 0x0000_ffff_ffff_f000 + */ #define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12) #define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) #define PTE_ADDR_HIGH_SHIFT 36 #else +/* + * IAMROOT20 20231202: + * CONFIG_ARM64_PA_BITS_48 일경우 + * PAGE_SHIFT 12 + * PTE_ADDR_MASK 0x0000_ffff_ffff_f000 + */ #define PTE_ADDR_MASK PTE_ADDR_LOW #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0bd18de9fd97b..b41b92226f021 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -76,11 +76,23 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_ARM64_PA_BITS_52 static inline phys_addr_t __pte_to_phys(pte_t pte) { + /* IAMROOT20 20231202 + * pte & 0x0000_ffff_ffff_0000 | (pte & 0xf000) << 36 + * exam) + * pte = 0x0000_ABCD_EF01_9000 + * return 0x0009_ABCD_EF01_0000 + */ return (pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT); } static inline pteval_t __phys_to_pte_val(phys_addr_t phys) { + /* IAMROOT20 20231202 + * (phys | (phys >> 36)) & 0x0000_ffff_ffff_f000 + * exam) + * phys = 0x0009_ABCD_EF01_0000 + * return 0x0000_ABCD_EF01_9000 + */ return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK; } #else @@ -89,6 +101,9 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #endif #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT) +/* IAMROOT20 20231209 + * pfn_pte(pfn,prot) = (pfn << PAGE_SHIFT) | prot + */ #define pfn_pte(pfn,prot) \ __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) @@ -124,6 +139,9 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) +/* IAMROOT20 20231202 + * return pte & PTE_VALID; + */ #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) /* * Execute-only user mappings do not have the PTE_USER bit set. All valid @@ -370,6 +388,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Hugetlb definitions. */ +/* IAMROOT20 20240406 + * ex) VA_BITS : 48, page size : 4KB + * - HPAGE_SHIFT 21 PMD_SHIFT + * - HPAGE_SIZE SIZE_2M (1 << 21) + * - HPAGE_MASK 0xffff~ffe0_0000 ~((1 << 21) - 1) + * - HUGETLB_PAGE_ORDER 9 (21-14) + */ #define HUGE_MAX_HSTATE 4 #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) @@ -381,6 +406,10 @@ static inline pte_t pgd_pte(pgd_t pgd) return __pte(pgd_val(pgd)); } +/* + * IAMROOT20 20231202: + * return p4d + */ static inline pte_t p4d_pte(p4d_t p4d) { return __pte(p4d_val(p4d)); @@ -602,8 +631,14 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, static inline bool pud_sect(pud_t pud) { return false; } static inline bool pud_table(pud_t pud) { return true; } #else +/* IAMROOT20 20231202 + * pud_sect(pud) => (pud & 3) == 1 + */ #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_SECT) +/* IAMROOT20 20231202 + * pud_table(pud) => (pud & 3) == 3 + */ #define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_TABLE) #endif @@ -631,7 +666,9 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) return; } #endif /* __PAGETABLE_PMD_FOLDED */ - + /* IAMROOT20 20231209 + * *pmdp = pmd + */ WRITE_ONCE(*pmdp, pmd); if (pmd_valid(pmd)) { @@ -678,6 +715,15 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) +/* IAMROOT20 20231202 + * pud_none(pud) => (pud == 0) + * pud_bed(pud) => (pud & 3 != 3) + * pud_present(pud) => (pud & (PTE_VALID | PTE_PROT_NONE)) + * pud_leaf(pud) => (pud_present(pud) && !pud_table(pud)) + * pud_valid(pud) => (pud & 1) + * pud_user(pud) => (pud & PTE_USER) + * pud_user_exec(pud) => !(pud & PTE_UXN) + */ #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!pud_table(pud)) #define pud_present(pud) pte_present(pud_pte(pud)) @@ -695,6 +741,9 @@ static inline void set_pud(pud_t *pudp, pud_t pud) } #endif /* __PAGETABLE_PUD_FOLDED */ + /* IAMROOT20 20231202 + * *pudp = pud + */ WRITE_ONCE(*pudp, pud); if (pud_valid(pud)) { @@ -749,17 +798,33 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) +/* + * IAMROOT20 20231202: + * p4d_none p4d == 0 + * p4d_bad !(p4d & 2) + * p4d_present p4d != 0 + */ #define p4d_none(p4d) (!p4d_val(p4d)) #define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) #define p4d_present(p4d) (p4d_val(p4d)) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { + /* IAMROOT20 20240420 + * fixmap_copy() 에서 호출하는 경우, + * swapper_pg_dir을 FIX_PGD에 mapping한 후 이 함수로 들어오기 때문에 + * 아래 in_swapper_pgdir 함수에서 false를 return + */ if (in_swapper_pgdir(p4dp)) { set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); return; } + /* IAMROOT20 20231202 + * *p4dp = p4d; + * dsb ishst + * isb + */ WRITE_ONCE(*p4dp, p4d); dsb(ishst); isb(); @@ -781,6 +846,9 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) } /* Find an entry in the first-level page table. */ +/* IAMROOT20 20231202 + * p4d_page_paddr(*dir) + pud_index(addr) * sizeof(pud_t) + */ #define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index f2d26235bfb4e..85d3c6958e946 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -31,6 +31,9 @@ #include #include +/* IAMROOT20 20240127 + * extern __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); /* @@ -40,6 +43,9 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); * And we can't use this_cpu_ptr() either, as that winds up recursing back * here under CONFIG_DEBUG_PREEMPT=y. */ +/* IAMROOT20 20240127 + * ex) &cpu_number + 0 + */ #define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number)) /* diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 5f54376210293..7fc1693261d01 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -5,6 +5,9 @@ #ifndef __ASM_SPARSEMEM_H #define __ASM_SPARSEMEM_H +/* IAMROOT20 20240809 + * MAX_PHYSMEM_BITS 48 + */ #define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS /* diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index eefd712f24303..e4f9a34c4ef5a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -585,10 +585,36 @@ #define ENDIAN_SET_EL1 0 #endif + +/* IAMROOT20 20231007 + * LSMAOE : Multiple Load/Store(AArch32) 명령어로 메모리 접근 시 인터럽트 허용 여부 설정 + * nTLSMD : Multiple Load/Store(AArch32) 명령어로 device 접근 시 fault 여부 설정 + * EIS : EL1의 context synchronizing event 여부에 따라 exception 설정 + * TSCXT : EL0의 SCXTNUM_EL0 접근 허용 여부 설정 + * EOS : EL1에서 반환되는 exception의 context synchronizing event 여부에 따라 비트 설정 +*/ #define INIT_SCTLR_EL1_MMU_OFF \ (ENDIAN_SET_EL1 | SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | \ SCTLR_EL1_EIS | SCTLR_EL1_TSCXT | SCTLR_EL1_EOS) +/* IAMROOT20 20231007 + * M : 1단계 주소 변환(EL0/EL1)에 대한 MMU 활성화 여부 설정 + * C : EL0/EL1에서 노멀 메모리에 접근되는 모든 데이터, + * 혹은 1변환 테이블에 접근되는 노멀 메모리에 대해 Cacheability 여부 설정 + * SA : EL1의 Stack Pointer 레지스터 얼라인먼트 체크 설정 + * SA0 : EL0의 Stack Pointer 레지스터 얼라인먼트 체크 설정 + * SED : AArch32의 EL0에서 SETEND 명령어 실행 허용 여부 설정 + * I : EL0/EL1에서 노멀 메모리에 접근하는 모든 명령어의 Cacheability 여부 설정 + * DZE : EL0에서 캐시를 제어하는 'DC ZCA' 명령어의 실행 허용 여부 설정 + * UCT : EL0에서 캐시의 동작을 설정하는 CTR_EL0 레지스터 접근 여부 설정 + * nTWI : EL0에서 WFI 명령어 실행 허용 여부 설정 + * IESB : Implicit Error Synchronization event 허용 여부 설정 + * SPAN : EL1에서 exception 발생 시 Privileged Access Never 활성화 여부 설정 + * ITFSB : Tag Check Fault의 synchronized 여부 설정 + * UCI : EL0에서 캐시를 설정하는 명령어 + * (DC CVAU, DC CIVAC, DC CVAC, DC CVAP, IC IVAU) 실행 허용 여부 설정 + * EPAN : Privileged Access Never 강화 여부 설정 +*/ #define INIT_SCTLR_EL1_MMU_ON \ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | \ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I | \ @@ -838,6 +864,11 @@ * For registers without architectural names, or simply unsupported by * GAS. */ +/* IAMROOT20 20240106 + * r = SYS_MPIDR_EL1 + * mrs_s __val, SYS_MPIDR_EL1 + * return __val; + */ #define read_sysreg_s(r) ({ \ u64 __val; \ asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 848739c15de82..e00b0a7423c39 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -21,6 +21,10 @@ struct task_struct; /* * low level task data that entry.S needs immediate access to. */ +/* IAMROOT20 20231111 + * preempt_count: 프로세스의 컨텍스트 실행 정보와 프로세스가 선점 스케줄링 될 정보를 저장. + * - 0이면 선점 가능, 0 미만이면 bug + */ struct thread_info { unsigned long flags; /* low level flags */ #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7d7128c651614..44803f8e664af 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1620,6 +1620,10 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) * state once the SMP CPUs are up and thus make the switch to non-global * mappings if required. */ +/* IAMROOT20 20240217 + * arm64 e0pd 기능을 사용할 수 있으면 kpti는 필요없다고 판단하여 false 반환 + * e0pd 기능을 사용할 수 없으면 kaslr이 켜져 있는지만 확인하여 true 반환 + */ bool kaslr_requires_kpti(void) { if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) @@ -1629,6 +1633,13 @@ bool kaslr_requires_kpti(void) * E0PD does a similar job to KPTI so can be used instead * where available. */ + /* IAMROOT20_END 20240203 */ + /* IAMROOT20_START 20240217 */ + /* IAMROOT20 20240217 + * ID_AA64MMFR2_EL1.E0PD를 읽어와서 E0PD 기능을 지원하는지 확인 + * 만약 지원하면 false를 반환 + * 만약 지원하지 않으면 계속 진행 + */ if (IS_ENABLED(CONFIG_ARM64_E0PD)) { u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); if (cpuid_feature_extract_unsigned_field(mmfr2, @@ -1640,6 +1651,9 @@ bool kaslr_requires_kpti(void) * Systems affected by Cavium erratum 24756 are incompatible * with KPTI. */ + /* IAMROOT20 20240217 + * 특정 벤더사에 대한 예외 상황으로 넘어감 + */ if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { extern const struct midr_range cavium_erratum_27456_cpus[]; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ab2a6e33c0528..5d1fd2b1f7bf7 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -505,6 +505,14 @@ tsk .req x28 // current thread_info /* * Exception vectors. + */ +/* IAMROOT20 20231118 + * kernel_ventry el, ht, regsize, label + * el : exception level + * ht : sp0 공유 여부 + * - t(thread) : sp0 공유 + * - h(handler): sp0 공유하지 않음 + * label : exception 라벨 */ .pushsection ".entry.text", "ax" diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4936d8eace3b4..7620b5a345383 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -132,6 +132,11 @@ SYM_CODE_START(primary_entry) * of the primary boot code to the PoC so we can safely execute it with * the MMU off. */ + /* + * IAMROOT20 20230909: + * MMU가 ON 상태이면 __idmap_text_start부터 __idmap_text_end 까지 + * cache clean 한다. + */ cbz x19, 0f adrp x0, __idmap_text_start adr_l x1, __idmap_text_end @@ -465,6 +470,10 @@ SYM_FUNC_START_LOCAL(remap_region) // Get the index offset for the start of the last level table lsr x1, x1, x6 + /* + * IAMROOT20 20230909: + * bic x1, x1, 0x1ff // PAGE_SHIFT = 12일 경우. + */ bfi x1, xzr, #0, #PAGE_SHIFT - 3 // Derive the start and end indexes into the last level table @@ -508,6 +517,15 @@ SYM_FUNC_START_LOCAL(create_idmap) * requires more than 47 or 48 bits, respectively. */ #if (VA_BITS < 48) + /* + * IAMROOT20 20230916: + * exam) VA_BITS=47 on 16k(14bit) + * IDMAP_PGD_ORDER = (47-36) = 11 + * EXTRA_SHIFT = (36 + 14 - 3) = 47 + * VA_BITS=39 on 4k(12bit) + * IDMAP_PGD_ORDER = 30 + * EXTRA_SHIFT = 39 + */ #define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT) #define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) @@ -525,6 +543,8 @@ SYM_FUNC_START_LOCAL(create_idmap) /* IAMROOT20 20230805 * PGDIR_SHIFT : PGD의 인덱스를 가져오기 위해 shift 해야 하는 횟수. * IDMAP_PGD_ORDER : PGD의 사이즈를 의미한다. + * exam) 9 : 48bit, 4k page : 48-39 + * exam) 10: 52bit, 64k : 52 - 42 */ #define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT) #define EXTRA_SHIFT @@ -559,7 +579,13 @@ SYM_FUNC_START_LOCAL(create_idmap) bl remap_region /* IAMROOT20_END 20230826 */ + /* IAMROOT20_START 20230909 */ /* Remap the FDT after the kernel image */ + /* IAMROOT20 20230909 + * 커널의 끝 주소를 2MB를 더해준 다음, 내림 연산을 하여 2MB 단위로 정렬한다. + * 매핑한 FDT의 물리 주소를 2MB 단위로 정렬한다. + * FDT의 영역을 4MB 크기로 잡아서, 물리 주소에 RW 속성으로 매핑한다. + */ adrp x1, _text adrp x22, _end + SWAPPER_BLOCK_SIZE bic x2, x22, #SWAPPER_BLOCK_SIZE - 1 @@ -578,13 +604,18 @@ SYM_FUNC_START_LOCAL(create_idmap) cbnz x19, 0f // skip cache invalidation if MMU is on dmb sy + /* IAMROOT20_START 20230916 */ adrp x0, init_idmap_pg_dir adrp x1, init_idmap_pg_end bl dcache_inval_poc 0: ret x28 + /* IAMROOT20_END 20230916 */ SYM_FUNC_END(create_idmap) SYM_FUNC_START_LOCAL(create_kernel_mapping) + /* IAMROOT20 20231028 + * init_pg_dir에 대해 page table을 생성 + */ adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR // compile time __va(_text) #ifdef CONFIG_RELOCATABLE @@ -612,7 +643,15 @@ SYM_FUNC_END(create_kernel_mapping) */ .macro init_cpu_task tsk, tmp1, tmp2 msr sp_el0, \tsk - + + /* IAMROOT20 20231111 + * tmp1 = tsk->stack + * TSK_STACK = 32 + * THREAD_SIZE = 16k + * PT_REGS_SIZE = 336 + * S_STACKFRAME = 304 + */ + /* IAMROOT20_END 20231111 */ /* IAMROOT20_START 20231118 */ ldr \tmp1, [\tsk, #TSK_STACK] add sp, \tmp1, #THREAD_SIZE sub sp, sp, #PT_REGS_SIZE @@ -622,6 +661,12 @@ SYM_FUNC_END(create_kernel_mapping) scs_load_current + /* IAMROOT20 20231118 + * tmp1 = __per_cpu_offset + * tmp2 = tsk.thread_info.cpu + * tmp1 = __per_cpu_offset[tmp2] + * tpidr_el1 = tmp1 + */ adr_l \tmp1, __per_cpu_offset ldr w\tmp2, [\tsk, #TSK_TI_CPU] ldr \tmp1, [\tmp1, \tmp2, lsl #3] @@ -633,6 +678,7 @@ SYM_FUNC_END(create_kernel_mapping) * * x0 = __pa(KERNEL_START) */ +/* IAMROOT20_START 20231111 */ SYM_FUNC_START_LOCAL(__primary_switched) adr_l x4, init_task init_cpu_task x4, x5, x6 @@ -641,11 +687,20 @@ SYM_FUNC_START_LOCAL(__primary_switched) msr vbar_el1, x8 // vector table address isb +/* IAMROOT20 20231118 + * x29 : stack frame + * x30 : link register + */ stp x29, x30, [sp, #-16]! mov x29, sp str_l x21, __fdt_pointer, x5 // Save FDT pointer +/* IAMROOT20 20231118 + * kimage_vaddr : 커널 이미지의 가상 주소의 시작 (0xffff800008000000) + */ +/* IAMROOT20_END 20231118 */ +/* IAMROOT20_START 20231125 */ ldr_l x4, kimage_vaddr // Save the offset between sub x4, x4, x0 // the kernel virtual and str_l x4, kimage_voffset, x5 // physical mappings @@ -654,13 +709,24 @@ SYM_FUNC_START_LOCAL(__primary_switched) bl set_cpu_boot_mode_flag // Clear BSS + /* IAMROOT20 20231125 + * bss 영역을 0으로 초기화 (init_pg_dir 아래에 위치) + */ adr_l x0, __bss_start mov x1, xzr adr_l x2, __bss_stop sub x2, x2, x0 bl __pi_memset + /* IAMROOT20 20231125 + * ISHST : 스토어가 마무리되기를 기다리는 동작 (Store - Store) + * domain : Inner Sharable domain + */ dsb ishst // Make zero page visible to PTW +/* IAMROOT20 20231125 + * 가상주소 52bit를 지원하면 vabits_actual = 52 + * 지원하지 않는다면 vabits_actual = 48 + */ #if VA_BITS > 48 adr_l x8, vabits_actual // Set this early so KASAN early init str x25, [x8] // ... observes the correct value @@ -708,11 +774,13 @@ SYM_FUNC_END(__primary_switched) * * x0: whether we are being called from the primary boot path with the MMU on */ +/* IAMROOT20_START 20231007 */ SYM_FUNC_START(init_kernel_el) mrs x1, CurrentEL cmp x1, #CurrentEL_EL2 b.eq init_el2 +/* IAMROOT_END 20231007 */ /* IAMROOT20_START 20231014 */ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) mov_q x0, INIT_SCTLR_EL1_MMU_OFF pre_disable_mmu_workaround @@ -752,6 +820,9 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) * making it impossible to start in nVHE mode. Is that * compliant with the architecture? Absolutely not! */ + /* IAMROOT20 20231014 + * 일부 CPU의 경우 E2H설정이 1로 고정되어 nVHE로 변경할 수 없다. + */ mrs x0, hcr_el2 and x0, x0, #HCR_E2H cbz x0, 1f @@ -855,8 +926,19 @@ SYM_FUNC_END(__secondary_too_slow) * in w0. See arch/arm64/include/asm/virt.h for more info. */ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag) + /* IAMROOT20 20231125 + * EL2일 경우 + * u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL2 }; + * x1 x1 + * EL1일 경우 + * u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL1, BOOT_CPU_MODE_EL1 }; + * x1 + */ adr_l x1, __boot_cpu_mode cmp w0, #BOOT_CPU_MODE_EL2 + /*IAMROOT20 20231125 + * EL1인 경우 label 1로 이동 + */ b.ne 1f add x1, x1, #4 1: str w0, [x1] // Save CPU boot mode @@ -898,12 +980,18 @@ SYM_FUNC_END(set_cpu_boot_mode_flag) SYM_FUNC_START(__enable_mmu) mrs x3, ID_AA64MMFR0_EL1 ubfx x3, x3, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4 + /* IAMROOT20 20231021 + * MIN : 0x0, MAX : 0x7 + */ cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN b.lt __no_granule_support cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX b.gt __no_granule_support phys_to_ttbr x2, x2 msr ttbr0_el1, x2 // load TTBR0 + /* IAMROOT20 20231028 + * 임시로 reserved_pg_dir(내용이 모두 0인 tlb)을 ttbr1에 load함 + */ load_ttbr1 x1, x1, x3 set_sctlr_el1 x0 @@ -952,6 +1040,12 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) mov_q x11, KIMAGE_VADDR // default virtual offset add x11, x11, x23 // actual virtual offset + /* IAMROOT20 20231104 + * relocation table을 참조하여 R_AARCH64_RELATIVE type을 가진 모든 entry들의 + * offset + kaslr displacement 주소의 값을 addend + kaslr displacement으로 설정함 + * relocation table의 offset은 구조체 변수의 필드 위치를 가리키는 것으로 보임 + * relocation table의 addend는 심볼 주소로 보임 + */ 0: cmp x9, x10 b.hs 1f ldp x12, x13, [x9], #24 @@ -997,6 +1091,16 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) adr_l x9, __relr_start adr_l x10, __relr_end + /* IAMROOT20 20231104 + * relative relocation 과정 + * 배경: 기존의 relocation table에서 R_AARCH64_RELATIVE type이 많은 부분을 차지하고 있음 + * R_AARCH64_RELATIVE의 특징은 offset이 8bytes 크기 차이로 모여있음 + * RELA relocation의 info, type을 제거하고 offset을 압축해서 relr section을 생성 + * relative relocation entry는 address와 bitmap으로 나누어짐. entry 값이 짝수이면 address를, 홀수이면 bitmap을 의미함. + * 짝수 값 entry는 맨 처음 1개의 relocation을 담당하고 (label 2) + * bitmap은 다음부터 8bytes씩 증가되는 63개의 relocation을 진행하며 bitmap LSB 값 1은 무시 (label 3, 4, 5, 6) + * rela relocation에서 진행했던 offset + kaslr_displacement / addend + kaslr_displacement 변경 과정은 동일 + */ 2: cmp x9, x10 b.hs 7f ldr x11, [x9], #8 @@ -1041,9 +1145,17 @@ SYM_FUNC_START_LOCAL(__primary_switch) and x23, x23, MIN_KIMG_ALIGN - 1 #ifdef CONFIG_RANDOMIZE_BASE mov x0, x22 + /* IAMROOT20 20231028 + * sp를 설정하여 c 함수가 호출할 수 있도록 함 + */ adrp x1, init_pg_end mov sp, x1 mov x29, xzr + /* IAMROOT20 20231028 + * RAMDOMIZE_BASE가 Enable되어 있는 경우 + * 커널 이미지를 재배치 할 offset을 구함 + * KASLR(Kernel Address Space Layout Randomization) + */ bl __pi_kaslr_early_init and x24, x0, #SZ_2M - 1 // capture memstart offset seed bic x0, x0, #SZ_2M - 1 @@ -1053,12 +1165,26 @@ SYM_FUNC_START_LOCAL(__primary_switch) bl clear_page_tables bl create_kernel_mapping + /* IAMROOT20 20231028 + * __enable_mmu에서 임시로 reserved_pg_dir을 load했었는데, + * ttbr1에 init_pg_dir을 load함 + */ adrp x1, init_pg_dir load_ttbr1 x1, x1, x2 + /* IAMROOT20_END 20231028 */ + /* IAMROOT20_START 20231104 */ #ifdef CONFIG_RELOCATABLE bl __relocate_kernel #endif + /* IAMROOT20 20231104 + * ldr x8, =__primary_switched는 pesudo instruction(literal pool)으로 + * 빌드 타임의 __primary_switched 가상 주소를 해당 명령어 주변에 셋팅함 + * 런 타임에 해당 라인이 실행되면 x8에 __primary_switched의 가상 주소를 저장하고 + * x0에 커널 이미지의 시작 물리 주소를 들고 + * x8 주소로 branch하면 TTBR1_EL1을 base 주소로 하는 init_pg_dir 페이지 테이블을 타고 커널 가상 주소 영역으로 진입 + */ ldr x8, =__primary_switched adrp x0, KERNEL_START // __pa(KERNEL_START) br x8 + /* IAMROOT20_END 20231104 */ SYM_FUNC_END(__primary_switch) diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 9439240c3fcf3..512117b8eb1d8 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -46,6 +46,9 @@ SYM_CODE_END(__hyp_stub_vectors) .align 11 SYM_CODE_START_LOCAL(elx_sync) + /* IAMROOT20 20240106 + * x0 = #HVC_FINALISE_EL2 + */ cmp x0, #HVC_SET_VECTORS b.ne 1f msr vbar_el2, x1 @@ -73,6 +76,7 @@ SYM_CODE_START_LOCAL(elx_sync) eret SYM_CODE_END(elx_sync) +/* IAMROOT20_END 20231223 */ SYM_CODE_START_LOCAL(__finalise_el2) finalise_el2_state @@ -139,6 +143,9 @@ SYM_CODE_END(__finalise_el2) .pushsection .idmap.text, "ax" SYM_CODE_START_LOCAL(enter_vhe) + /* IAMROOT20 20230106 + * TLB invalidate by VMID(Virtual Machine ID ), All at stage 1, EL1. + */ // Invalidate TLBs before enabling the MMU tlbi vmalle1 dsb nsh @@ -215,6 +222,7 @@ SYM_FUNC_END(__hyp_reset_vectors) * * w0: boot mode, as returned by init_kernel_el() */ +/* IAMROOT20_START 20240106 */ SYM_FUNC_START(finalise_el2) // Need to have booted at EL2 cmp w0, #BOOT_CPU_MODE_EL2 diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 370ab84fd06e2..0321bc07793ad 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -185,37 +185,71 @@ static int __init parse_nokaslr(char *unused) } early_param("nokaslr", parse_nokaslr); +/* IAMROOT20 20231216 + * return 값이 0이면 성공적으로 찾음. 그외의 값이면 실패. + */ static int __init find_field(const char *cmdline, const struct ftr_set_desc *reg, int f, u64 *v) { char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2]; int len; + /* IAMROOT20 20231216 + * exam) cmdline = "kaslr.disabled=1" + * reg->name = "kaslr" + * reg->fields[0].name = "disabled" + * ==> opt = "kaslr.disabled=" + * ==> len = 15 + * + * ==> kstrtou64("1", 0, v); + */ len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=", reg->name, reg->fields[f].name); if (!parameqn(cmdline, opt, len)) return -1; + /* IAMROOT20 20231223 + * exam) cmdline = "kaslr.disabled=1" + * cmdline + len = "1" + * return : 0, v = 1 + */ return kstrtou64(cmdline + len, 0, v); } +/* IAMROOT20_START 20231223 + * exam) cmdline="kaslr.disabled=1" + */ static void __init match_options(const char *cmdline) { int i; for (i = 0; i < ARRAY_SIZE(regs); i++) { int f; - + /* IAMROOT20 20231223 + * exam) kaslr -> regs[6] + */ if (!regs[i]->override) continue; for (f = 0; strlen(regs[i]->fields[f].name); f++) { + /* IAMROOT20 20231223 + * exam) kaslr.fileds[0] = { "disabled", 0, 4, NULL} + * shift = 0; + * width = 4; + * mask = GENMASK_ULL(3, 0) -> 0b1111 -> 0xf + */ u64 shift = regs[i]->fields[f].shift; u64 width = regs[i]->fields[f].width ?: 4; u64 mask = GENMASK_ULL(shift + width - 1, shift); u64 v; + /* IAMROOT20 20231223 + * exam) cmdline = "kaslr.disabled=1" + * regs = &kaslr, f = 0 + * + * return : 0, v = 1 + */ if (find_field(cmdline, regs[i], f, &v)) continue; @@ -224,6 +258,10 @@ static void __init match_options(const char *cmdline) * it by setting the value to the all-ones while * clearing the mask... Yes, this is fragile. */ + /* IAMROOT20 20231223 + * filter에서 실패가 나오면 mask값은 0으로 설정하고 + * val값은 모두 1로 셋팅하여 invalid value임을 표시한다. + */ if (regs[i]->fields[f].filter && !regs[i]->fields[f].filter(v)) { regs[i]->override->val |= mask; @@ -231,9 +269,18 @@ static void __init match_options(const char *cmdline) continue; } + /* IAMROOT20 20231216 + * exam) kaslr.override->val = 0, kaslr.override->mask = 0 + * v = 1 + * mask = 0x0f + * regs[i]->override->val = 0 + * regs[i]->override->val |= (1 << 0) & 0x0f + * regs[i]->override->mask |= 0x0f + */ regs[i]->override->val &= ~mask; regs[i]->override->val |= (v << shift) & mask; regs[i]->override->mask |= mask; + /* IAMROOT20_END 20231216 */ return; } @@ -264,6 +311,10 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) match_options(buf); + /* IAMROOT20 20231223 + * exam) buf = "nokaslr" 일경우 + * aliases[i].feature = "kaslr.disabled=1" + */ for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++) if (parameq(buf, aliases[i].alias)) __parse_cmdline(aliases[i].feature, false); @@ -293,6 +344,13 @@ static __init const u8 *get_bootargs_cmdline(void) static __init void parse_cmdline(void) { + /* IAMROOT20 20231216 + * \ { + * chosen { + * bootargs = "console=ttyS1,115200 earlyprintk"; + * } + * }; + */ const u8 *prop = get_bootargs_cmdline(); if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop) diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c index 17bff6e399e46..4e26f43a2a076 100644 --- a/arch/arm64/kernel/pi/kaslr_early.c +++ b/arch/arm64/kernel/pi/kaslr_early.c @@ -88,11 +88,21 @@ static u64 get_kaslr_seed(void *fdt) asmlinkage u64 kaslr_early_init(void *fdt) { u64 seed; - + + /* IAMROOT20 20231028 + * cmdline에서 kaslr이 disable되어 있는지 확인 + */ if (is_kaslr_disabled_cmdline(fdt)) return 0; + /* IAMROOT20 20231028 + * DT에서 kaslr seed 값을 read + */ seed = get_kaslr_seed(fdt); + /* IAMROOT20 20231028 + * DT에 seed 값이 존재하지 않으면, + * RNDR 레지스터를 읽어 seed를 얻음 + */ if (!seed) { if (!__early_cpu_has_rndr() || !__arm64_rndr((unsigned long *)&seed)) @@ -106,5 +116,8 @@ asmlinkage u64 kaslr_early_init(void *fdt) * the lower and upper quarters to avoid colliding with other * allocations. */ + /* IAMROOT20 20231028 + * seed, VA_BITS_MIN을 이용하여 kaslr offset을 return + */ return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0)); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b8ec7b3ac9cbe..3129a441d0121 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -88,6 +88,11 @@ u64 __cacheline_aligned boot_args[4]; void __init smp_setup_processor_id(void) { + /* IAMROOT20 20240106 + * mpidr = MPIDR_EL1 & 0xff00ffffff + * mpidir Affinity level 0~3 정보를 가져옴. + * http://jake.dothome.co.kr/smp_setup_processor_id + */ u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; set_cpu_logical_map(0, mpidr); @@ -176,7 +181,10 @@ void __init *get_early_fdt_ptr(void) asmlinkage void __init early_fdt_map(u64 dt_phys) { int fdt_size; - + + /* IAMROOT20 20231209 + * fixmap 영역을 init_pg_dir, bm_pud, bm_pmd, bm_pte를 이용하여 매핑한다. + */ early_fixmap_init(); early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL); } @@ -206,14 +214,23 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) cpu_relax(); } + /* IAMROOT20_20240302 + * 가상 메모리의 fdt 영역을 read-only로 remapping + */ /* Early fixups are done, map the FDT as read-only now */ fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); + /* IAMROOT20_20240302 + * dtb root 노드에서 model 프로퍼티의 문자열을 name에 가져옴 + */ name = of_flat_dt_get_machine_name(); if (!name) return; pr_info("Machine model: %s\n", name); + /* IAMROOT20_20240302 + * 디버깅 로그를 위해 dump_stack_arch_desc_str 전역 변수에 앞에서 가져온 name을 저장 해놈 + */ dump_stack_set_arch_desc("%s (DT)", name); } @@ -283,6 +300,9 @@ static int __init reserve_memblock_reserved_regions(void) } arch_initcall(reserve_memblock_reserved_regions); +/* IAMROOT20 20240113 + * __cpu_logical_map[0] = 0(mpidr & 0xff00ffff) + */ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; u64 cpu_logical_map(unsigned int cpu) @@ -303,9 +323,16 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) */ arm64_use_ng_mappings = kaslr_requires_kpti(); + /* + * IAMROOT20 20240217 + * fixmap으로 각 페이지 테이블의 물리 주소에 접근할 수 있게 된다. + * head.S에서 이미 한 번 호출이 되었기 때문에, 아래 호출에선 페이지 테이블 주소가 + * 잘 설정되어 있는지만 확인하는 것 같다. + */ early_fixmap_init(); early_ioremap_init(); + /* IAMROOT20_END 20240217 */ setup_machine_fdt(__fdt_pointer); /* @@ -324,13 +351,21 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) */ local_daif_restore(DAIF_PROCCTX_NOIRQ); + /* IAMROOT20_20240323 END */ /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ + /* IAMROOT20_START 20240330 */ cpu_uninstall_idmap(); + /* IAMROOT20 20240330 + * xen_early_init은 분석하지 않음 + */ xen_early_init(); + /* IAMROOT20 20240330 + * efi_init은 분석하지 않음 + */ efi_init(); if (!efi_enabled(EFI_BOOT)) { @@ -341,9 +376,14 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) } arm64_memblock_init(); + /* IAMROOT20_END 20240413 */ + /* IAMROOT20_START 20240420 */ paging_init(); + /* IAMROOT20 20240511 + * acpi는 분석하지 않음 + */ acpi_table_upgrade(); /* Parse the ACPI tables for possible boot-time configuration */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d00d4cbb31b16..87d731eca088d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -53,6 +53,9 @@ #include +/* IAMROOT20 20240127 + * __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 5d68510ca3fb0..e39fbd29f5e42 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -195,6 +195,12 @@ SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) * - end - virtual end address of region */ SYM_FUNC_START(__pi_dcache_clean_poc) +/* + * IAMROOT20 20230909: + * + * dcache_line_size x2, x3 ; x2 = 64 + * dcache_by_myline_op cvac, sy, x0, x1, x2, x3 + */ dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret SYM_FUNC_END(__pi_dcache_clean_poc) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index e1e0dca018392..8ae303cac226e 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -356,6 +356,14 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) if (system_supports_cnp() && asid) ttbr0 |= TTBR_CNP_BIT; + /* IAMROOT20 20240427 + * TTBR_ASID_MASK = 0xffff << 48 + * __bf_shf(TTBR_ASID_MASK) = 48 + * + * FIELD_PREP(TTBR_ASID_MASK, asid) = (asid << 48 ) & (0xffff << 48) + * + * TTBR의 AISD 필드의 mask와 asid를 AND 연산한 값을 가져온다. + */ /* SW PAN needs a copy of the ASID in TTBR0 for entry */ if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN)) ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid); diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index c0a3301203bdf..cef0cdda4116b 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -23,17 +23,30 @@ static_assert(NR_BM_PMD_TABLES == 1); +/* IAMROOT20 20231216 + * FIX_PMD vaddr : 0xfffffbfffdc34000, shift : 21 + * (0xfffffbfffdc34000 >> 21) - (0xffff_fbff_fdc3_1000 >> 21) = 0 + */ #define __BM_TABLE_IDX(addr, shift) \ (((addr) >> (shift)) - (FIXADDR_TOT_START >> (shift))) - +/* IAMROOT20 20231209 + * BM_PTE 테이블에서 addr이 가리키는 index를 찾는다 + */ #define BM_PTE_TABLE_IDX(addr) __BM_TABLE_IDX(addr, PMD_SHIFT) - +/* IAMROOT20 20231209 + * exam) 4KB / 4 level + * -> bm_pte[2][512] + */ static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; static inline pte_t *fixmap_pte(unsigned long addr) { + /* IAMROOT20 20231216 + * idx = FIX_PMD 일 경우 + * addr = 0xfffffbfffdc34000 &bm_pte[0][52] 를 리턴 + */ return &bm_pte[BM_PTE_TABLE_IDX(addr)][pte_index(addr)]; } @@ -57,6 +70,7 @@ static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr, if (pud_none(pud)) __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); + /* IAMROOT20_END 20231202 */ /* IAMROOT20_START 20231209 */ pmdp = pmd_offset_kimg(pudp, addr); do { @@ -86,6 +100,7 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); pudp = pud_offset_kimg(p4dp, addr); + /* IAMROOT20_END 20231125 */ /* IAMROOT20_START 20231202 */ early_fixmap_init_pmd(pudp, addr, end); } @@ -99,7 +114,11 @@ void __init early_fixmap_init(void) { unsigned long addr = FIXADDR_TOT_START; unsigned long end = FIXADDR_TOP; - + + /* IAMROOT20 20231125 + * *pgdp = init_pg_dir + pgd + * *p4dp = *pgdp; + */ pgd_t *pgdp = pgd_offset_k(addr); p4d_t *p4dp = p4d_offset(pgdp, addr); @@ -118,9 +137,17 @@ void __set_fixmap(enum fixed_addresses idx, BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + /* IAMROOT20 20231216 + * idx = FIX_PMD 일 경우 + * addr = 0xfffffbfffdc34000 이며 + * bm_pte[0][52]의 주소를 가져옴. + */ ptep = fixmap_pte(addr); if (pgprot_val(flags)) { + /* IAMROOT20 20231216 + * phys주소를 bm_pte[][]에 쓴다(매핑한다). + */ set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); } else { pte_clear(&init_mm, addr, ptep); @@ -142,6 +169,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) * fields of the FDT header after mapping the first chunk, double check * here if that is indeed the case. */ + /* IAMROOT20 20231209 + * MIN_FDT_ALIGN은 최소 8이어야 한다 + */ BUILD_BUG_ON(MIN_FDT_ALIGN < 8); if (!dt_phys || dt_phys % MIN_FDT_ALIGN) return NULL; @@ -173,6 +203,10 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) */ void __init fixmap_copy(pgd_t *pgdir) { + /* IAMROOT20 20240420 + * init_pg_dir에서 FIXADDR_TOT_START 주소가 가리키는 index의 descriptor를 + * swapper_pg_dir에 복사한다 + */ if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdir, FIXADDR_TOT_START)))) { /* * The fixmap falls in a separate pgd to the kernel, and doesn't diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 95364e8bdc194..039710f5971e3 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -41,8 +41,15 @@ void __init arm64_hugetlb_cma_reserve(void) int order; if (pud_sect_supported()) + /* IAMROOT20 20240601 + * 30 - 12 = 18 + */ order = PUD_SHIFT - PAGE_SHIFT; else + /* IAMROOT20 20240706 + * ex) 16K + * 30 - 14 = 16 + */ order = CONT_PMD_SHIFT - PAGE_SHIFT; /* diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 66e70ca476805..c8ba15a0c4613 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -62,6 +62,10 @@ EXPORT_SYMBOL(memstart_addr); * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, * otherwise it is empty. */ +/* IAMROOT20 20240810 + * arm64_dma_phys_limit 0x1_0000_0000 + * zone_sizes_init() + */ phys_addr_t __ro_after_init arm64_dma_phys_limit; /* Current arm64 boot protocol requires 2MB alignment */ @@ -192,9 +196,19 @@ static void __init reserve_crashkernel(void) */ static phys_addr_t __init max_zone_phys(unsigned int zone_bits) { + /* IAMROOT20 20240803 + * DMA_BIT_MASK(32) = 0xFFFF_FFFF + */ phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits); phys_addr_t phys_start = memblock_start_of_DRAM(); + /* IAMROOT20 20240803 + * phys_start > 4G 인 경우 + * - zone_mask = 0xFFFF_FFFF_FFFF_FFFF + * + * 4G >= phys_start > zone_mask + * - zone_mask = 0xFFFF_FFFF + */ if (phys_start > U32_MAX) zone_mask = PHYS_ADDR_MAX; else if (phys_start > zone_mask) @@ -205,6 +219,18 @@ static phys_addr_t __init max_zone_phys(unsigned int zone_bits) static void __init zone_sizes_init(void) { + /* IAMROOT20 20240803 + * MAX_NR_ZONES 4 + * - config에 따라 MAX_NR_ZONES가 달라짐 + * - default config에서는 ZONE_DMA, ZONE_DMA32, ZONE_NORMAL, ZONE_MOVABLE + * + * dma32_phys_limit = 0xFFFF_FFFF + */ + /* IAMROOT20 20240810 + * max_zone_pfns[ZONE_DMA] = 0x100000 + * max_zone_pfns[ZONE_DMA32] = 0x100000 + * max_zone_pfns[ZONE_NORMAL] = max_pfn + */ unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; unsigned int __maybe_unused acpi_zone_dma_bits; unsigned int __maybe_unused dt_zone_dma_bits; @@ -212,6 +238,10 @@ static void __init zone_sizes_init(void) #ifdef CONFIG_ZONE_DMA acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address()); + /* IAMROOT20 20240810 + * of_dma_get_max_cpu_address(NULL) + * - "dma-ranges"의 dma 주소를 cpu 주소로 변환했을 때, max 주소 값을 return + */ dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL)); zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits); arm64_dma_phys_limit = max_zone_phys(zone_dma_bits); @@ -260,6 +290,11 @@ early_param("mem", early_mem); void __init arm64_memblock_init(void) { + /* IAMROOT20 20240330 + * ex) PAGE_END = 0xffff_8000_0000_0000 + * _PAGE_OFFSET(vabits_actual) = 0xffff_0000_0000_0000 + * 0xffff_0000_0000_0000 ~ 0xffff_8000_0000_0000은 linear + */ s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual); /* @@ -277,11 +312,19 @@ void __init arm64_memblock_init(void) } /* Remove memory above our supported physical address size */ + /* IAMROOT20 20240330 + * ex) pa 48bit 사용 시 + * 0x0001_0000_0000_0000 ~ 0xffff_ffff_ffff_ffff의 범위의 물리 메모리는 사용되지 않음으로 memblock.memory에서 제거 + */ memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); /* * Select a suitable value for the base of physical memory. */ + /* IAMROOT20 20240330 + * memblock.memory.regions[0]의 base를 ARM64_MEMSTART_ALIGN(ex 1GB)에 맞춰서 내림하여 memstart_addr을 설정 + * ex) memstart_addr = 0x0000_0000_4000_0000 + */ memstart_addr = round_down(memblock_start_of_DRAM(), ARM64_MEMSTART_ALIGN); @@ -293,12 +336,26 @@ void __init arm64_memblock_init(void) * linear mapping. Take care not to clip the kernel which may be * high in memory. */ + /* IAMROOT20 20240330 + * ex) memstart_addr = 0x0000_0000_4000_0000 + * linear_region_size = 0x0000_8000_0000_0000 + * memstart_addr + linear_region_size과 __pa_symbol(_end) 중 더 큰 값부터 0xffff_ffff_ffff_ffff까지 범위를 memblock.memory에서 제거 + */ memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa_symbol(_end)), ULLONG_MAX); + /* IAMROOT20 20240330 + * memstart_addr부터 memblock_end_of_DRAM()까지의 크기가 linear_region_size보다 큰 경우 + */ if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) { /* ensure that memstart_addr remains sufficiently aligned */ + /* IAMROOT20 20240330 + * memblock_end_of_DRAM()에서 아래쪽으로 linear_region_size만큼으로 memstart_addr을 재설정 + */ memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size, ARM64_MEMSTART_ALIGN); + /* IAMROOT20 20240330 + * 0부터 새로운 memstart_addr까지 memblock.memory에서 제거 + */ memblock_remove(0, memstart_addr); } @@ -309,6 +366,10 @@ void __init arm64_memblock_init(void) * we have to move it upward. Since memstart_addr represents the * physical address of PAGE_OFFSET, we have to *subtract* from it. */ + /* IAMROOT20 20240330 + * CONFIG_VA_52bit이고 vabits_actual이 52bit가 아닐 경우 + * 52bit 기준으로 설정된 가상 주소를 48bit 기준으로 변경 + */ if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) memstart_addr -= _PAGE_OFFSET(48) - _PAGE_OFFSET(52); @@ -317,17 +378,27 @@ void __init arm64_memblock_init(void) * high up in memory, add back the kernel region that must be accessible * via the linear mapping. */ + /* IAMROOT20 20240330 + * arm64의 경우 memory_limit는 PHYS_ADDR_MAX로 선언할때 초기화되어 있음 + * 만약 boot parameter의 mem으로 값이 설정되어 들어왔다면 memory_limit는 해당 값으로 변경되어 있었을 것 + */ if (memory_limit != PHYS_ADDR_MAX) { memblock_mem_limit_remove_map(memory_limit); memblock_add(__pa_symbol(_text), (u64)(_end - _text)); } + /* IAMROOT20 20240330 + * fdt에서 initrd의 메모리 영역을 설정했다면 + */ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) { /* * Add back the memory we just removed if it results in the * initrd to become inaccessible via the linear mapping. * Otherwise, this is a no-op */ + /* IAMROOT20 20240330 + * base와 size를 설정 + */ u64 base = phys_initrd_start & PAGE_MASK; u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base; @@ -339,23 +410,49 @@ void __init arm64_memblock_init(void) * each other) so that all granule/#levels combinations can * always access both. */ + /* IAMROOT20 20240330 + * base가 memblock_start_of_DRAM()보다 작거나 + * (base + size)가 (memblock_start_of_DRAm() + linear_region_size)보다 크면 + * initrd의 메모리 영역이 linear mapping될 수 없으므로 warning + */ if (WARN(base < memblock_start_of_DRAM() || base + size > memblock_start_of_DRAM() + linear_region_size, "initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) { phys_initrd_size = 0; } else { + /* IAMROOT20 20240330 + * initrd의 메모리 영역을 memblock.memory에 등록 + */ memblock_add(base, size); + /* IAMROOT20 20240330 + * memblock.memory에서 해당 region에서 MEMBLOCK_NOMAP flag를 clear + */ memblock_clear_nomap(base, size); + /* IAMROOT20 20240330 + * initrd의 메모리 영역을 memblock.reserved에 등록 + */ memblock_reserve(base, size); } } if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern u16 memstart_offset_seed; + /* IAMROOT20 20240330 + * id_aa64mmfr0_el1 레지스터 값을 읽음 + */ u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); + /* IAMROOT20 20240330 + * id_aa64mmfr0_el1.parange 필드 값을 추출 + * ex) pa 48bit, parange = 0b0101 + */ int parange = cpuid_feature_extract_unsigned_field( mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); + /* IAMROOT20 20240330 + * ex) pa 48bit + * range = 0x0000_8000_0000_0000 - 0x0001_0000_0000_0000 + * range = -0x0000_8000_0000_0000 + */ s64 range = linear_region_size - BIT(id_aa64mmfr0_parange_to_phys_shift(parange)); @@ -364,6 +461,9 @@ void __init arm64_memblock_init(void) * margin, the size of the region that the physical memory can * span, randomize the linear region as well. */ + /* IAMROOT20 20240330 + * ex) pa 48bit일 경우 range가 음수이므로 해당 if문에 진입하지 못함 + */ if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) { range /= ARM64_MEMSTART_ALIGN; memstart_addr -= ARM64_MEMSTART_ALIGN * @@ -375,7 +475,13 @@ void __init arm64_memblock_init(void) * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. */ + /* IAMROOT20 20240330 + * kernel image 물리 주소 region을 memblock.reserved에 저장 + */ memblock_reserve(__pa_symbol(_stext), _end - _stext); + /* IAMROOT20 20240330 + * initrd_start, initrd_end에 가상주소를 저장 + */ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) { /* the generic initrd code expects virtual addresses */ initrd_start = __phys_to_virt(phys_initrd_start); @@ -384,6 +490,11 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); + /* IAMROOT20 20240413 + * __va : 물리 주소를 '리니어 커널 메모리 매핑 영역'의 가상 주소로 변환 + * - 리니어 커널 메모리 매핑 영역(4K, VA_BITS=48) + * : PAGE_OFFSET(0xffff_0000_0000_0000) ~ 0xffff_8000_0000_0000 + */ high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } @@ -412,6 +523,9 @@ void __init bootmem_init(void) dma_pernuma_cma_reserve(); + /* IAMROOT20 20240720 + * kvm은 분석하지 않음 + */ kvm_hyp_reserve(); /* diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index af6bc8403ee46..79f817934b7bb 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -118,6 +118,10 @@ static phys_addr_t __init early_pgtable_alloc(int shift) * slot will be free, so we can (ab)use the FIX_PTE slot to initialise * any level of table. */ + /* IAMROOT20 20240420 + * cpu에서는 물리주소(phys)에 접근할 수 없기 때문에, FIX_PTE에 mapping + * 하여 cpu에서 접근할 수 있도록 설정 + */ ptr = pte_set_fixmap(phys); memset(ptr, 0, PAGE_SIZE); @@ -126,6 +130,9 @@ static phys_addr_t __init early_pgtable_alloc(int shift) * Implicit barriers also ensure the zeroed page is visible to the page * table walker */ + /* IAMROOT20 20240420 + * cpu에서 가상 주소로 page를 clear해 준다음, FIX_PTE mapping을 제거 + */ pte_clear_fixmap(); return phys; @@ -219,6 +226,16 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, do { pgprot_t __prot = prot; + /* IAMROOT20 20231216 + * granule size | cont PTE | cont PMD | + * -------------+------------+------------+ + * 4 KB | 64 KB | 32 MB | + * 16 KB | 2 MB | 1 GB* | + * 64 KB | 2 MB | 16 GB* | + * + * 간략히 설명하자면 아래와 같다. + * next = min(addr + (cont PTE), end); + */ next = pte_cont_addr_end(addr, end); /* use a contiguous mapping if the range is suitably aligned */ @@ -239,10 +256,17 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, unsigned long next; pmd_t *pmdp; + /* IAMROOT20 20231216 + * FIX_PMD 가장주소를 매핑 + */ pmdp = pmd_set_fixmap_offset(pudp, addr); do { pmd_t old_pmd = READ_ONCE(*pmdp); + /* IAMROOT20 20231216 + * exam) addr: 0xfffffbfffddfe000 end: 0xfffffbfffde11000 + * next: 0xfffffbfffde00000 + */ next = pmd_addr_end(addr, end); /* try section mapping first */ @@ -300,9 +324,16 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, next = pmd_cont_addr_end(addr, end); /* use a contiguous mapping if the range is suitably aligned */ + /* IAMROOT20 20231209 + * CONT_PMD_MASK = ~(32MB - 1) + * + * addr, next, phys가 모두 32MB 로 정렬되어 있는지 확인 + * NO_CONT_MAPPINGS 플래그가 set되어 있지 않는지 확인 + */ if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + /* IAMROOT20_END 20231209 */ /* IAMROOT20_START 20231216 */ init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags); @@ -327,12 +358,21 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, if (flags & NO_EXEC_MAPPINGS) p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); + /* IAMROOT20 20240420 + * ex) pgtable_alloc = early_pgtable_alloc의 경우 + * - memblock에서 1 PAGE를 할당하고, 물리 주소를 return + */ pud_phys = pgtable_alloc(PUD_SHIFT); __p4d_populate(p4dp, pud_phys, p4dval); p4d = READ_ONCE(*p4dp); } BUG_ON(p4d_bad(p4d)); - + + /* IAMROOT20 20231209, 20240420 + * - pudp(물리주소) = (*p4dp) + pud_index(addr)를 FIX_PUD에 매핑한다 + * - pudp(가상주소) = virt(FIX_PUD) + (pudp 물리주소 & (PAGE_SIZE - 1))를 + * return + */ pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -382,8 +422,14 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) return; + /* IAMROOT20 20231209 + * virt, phys 주소를 page size만큼 round down + */ phys &= PAGE_MASK; addr = virt & PAGE_MASK; + /* IAMROOT20 20231209 + * virt + size 주소를 page size 단위로 round up + */ end = PAGE_ALIGN(virt + size); do { @@ -564,6 +610,11 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { static void __init map_mem(pgd_t *pgdp) { + /* IAMROOT20 20240427 + * ex) VA_BITS_MIN = 48인 경우 + * _PAGE_END(48) = 0xFFFF_8000_0000_0000 + * : 리니어 커널 메모리 매핑 영역의 끝 주소를 의미 + */ static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); phys_addr_t kernel_start = __pa_symbol(_stext); phys_addr_t kernel_end = __pa_symbol(__init_begin); @@ -592,9 +643,17 @@ static void __init map_mem(pgd_t *pgdp) * So temporarily mark them as NOMAP to skip mappings in * the following for-loop */ + /* IAMROOT20 20240427 + * read-only text와 rodata 섹션을 nomap으로 설정하여 + * 아래 for-loop에서 매핑하는 것을 방지 + */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); /* map all the memory banks */ + /* IAMROOT20 20240427 + * memblock.memory의 모든 region을 순회하며 + * 리니어 매핑 영역(0xFFFF_0000_0000_0000 ~ 0xFFFF_8000_0000_0000)에 매핑한다 + */ for_each_mem_range(i, &start, &end) { if (start >= end) break; @@ -603,6 +662,12 @@ static void __init map_mem(pgd_t *pgdp) * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ + /* IAMROOT20 20240427 + * MTE : ARMv8.5 에서 추가된 보안기법으로, 메모리 할당/해제 연산마다 + * 사용되는 포인터와 (2) 접근하는 메모리 간에 상호 태그를 하고, + * 태그 정보 비교를 통해 안전한 접근인지 체크하는 일종의 Sanitizer 기법이다. + * https://velog.io/@pensieveview/MTE-Memory-Tagging-Extension-%EB%A9%94%EB%AA%A8%EB%A6%AC%ED%83%9C%EA%B9%85 + */ __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), flags); } @@ -617,6 +682,10 @@ static void __init map_mem(pgd_t *pgdp) * Note that contiguous mappings cannot be remapped in this way, * so we should avoid them here. */ + /* IAMROOT20 20240427 + * 커널 영역을 커널 페이지 속성으로 리니어 영역에 매핑 + * - contiguous 매핑 허용하지 않음 + */ __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); @@ -723,6 +792,9 @@ static void __init map_kernel(pgd_t *pgdp) * mapping to install SW breakpoints. Allow this (only) when * explicitly requested with rodata=off. */ + /* IAMROOT20 20240420 + * SW breakpoint -> "b ." + */ pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; /* @@ -748,6 +820,8 @@ static void __init map_kernel(pgd_t *pgdp) map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); fixmap_copy(pgdp); + /* IAMROOT20_END 20240420 */ + /* IAMROOT20_START 20240427 */ kasan_copy_shadow(pgdp); } @@ -759,12 +833,25 @@ static void __init create_idmap(void) u64 pgd_phys; /* check if we need an additional level of translation */ + /* IAMROOT20 20240511 + * 물리 주소의 idmap이 동일한 주소의 유저 가상 주소 공간에 배치가 불가능한 경우 + * -> 테이블 단계를 증가시켜 유저 가상 주소 공간을 키워 매핑하게 함 + * + * ex) VA_BITS = 42, VA_BITS_MIN = 42 + * idmap_t0sz = 16 + * if( (42 < 48) && (16 < 22) ) -> 페이지 테이블 단계를 증가시킴 + */ if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { pgd_phys = early_pgtable_alloc(PAGE_SHIFT); set_pgd(&idmap_pg_dir[start >> VA_BITS], __pgd(pgd_phys | P4D_TYPE_TABLE)); pgd = __va(pgd_phys); } + /* IAMROOT20 20240511 + * __idmap_text_start ~ __idmap_text_end 까지를 idmap_pg_dir에 매핑한다 + * idmap_pg_dir : 가상 주소와 물리 주소가 1:1로 매핑되어 사용될 때 + * 필요한 테이블로 영구적으로 사용 + */ __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, early_pgtable_alloc, 0); @@ -786,6 +873,13 @@ void __init paging_init(void) pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); extern pgd_t init_idmap_pg_dir[]; + /* IAMROOT20 20240420 + * idmap_t0sz = ID map 영역이 mapping 할 수 있는 크기 + * - _end가 2^VA_BIT_MIN보다 큰 주소 위치에 있을 경우를 대비하여 설정 + * + * ex) __fls(0b1110) = 3 + * cf. __ffs(0b1110) = 1 + */ idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0)); map_kernel(pgdp); @@ -796,9 +890,17 @@ void __init paging_init(void) cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir); init_mm.pgd = swapper_pg_dir; + /* IAMROOT20 20240511 + * 부팅 초기에 사용했던 init_pg_dir이 swapper_pg_dir로 대체되었고, + * init_pg_dir을 memblock.reserved에서 지운다 + */ memblock_phys_free(__pa_symbol(init_pg_dir), __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); + /* IAMROOT20 20240511 + * 이 함수 호출 이후에 memblock_double_array에서 memblock array가 부족할 때 + * 크기를 2배씩 증가시킬 수 있다 + */ memblock_allow_resize(); create_idmap(); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c2cb437821ca4..40f51e310e0d0 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -171,6 +171,10 @@ SYM_FUNC_END(cpu_do_resume) .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 adrp \tmp1, reserved_pg_dir + /* IAMROOT20 20240511 + * PA_BITS가 52가 아닌 경우에는 + * -> phys_to_ttbr : mov tmp2, tmp1 + */ phys_to_ttbr \tmp2, \tmp1 offset_ttbr1 \tmp2, \tmp1 msr ttbr1_el1, \tmp2 @@ -187,6 +191,9 @@ SYM_FUNC_END(cpu_do_resume) * called by anything else. It can only be executed from a TTBR0 mapping. */ SYM_TYPED_FUNC_START(idmap_cpu_replace_ttbr1) + /* IAMROOT20 20240511 + * msr ttbr1_el1, reserved_pg_dir + */ __idmap_cpu_set_reserved_ttbr1 x1, x3 offset_ttbr1 x0, x3 @@ -413,7 +420,11 @@ SYM_FUNC_START(__cpu_setup) enable_dbg // since this is per-cpu reset_pmuserenr_el0 x1 // Disable PMU access from EL0 reset_amuserenr_el0 x1 // Disable AMU access from EL0 - + /* IAMROOT20_END 20231014 */ /* IAMROOT20_START 20231021 */ + /* IAMROOT20 20231021 + * mair(x17) : Memory Attribute Indirection Register + * tcr(x16) : Translation Control Register + */ /* * Default values for VMSA control registers. These will be adjusted * below depending on detected CPU features. @@ -428,10 +439,17 @@ SYM_FUNC_START(__cpu_setup) tcr_clear_errata_bits tcr, x9, x5 #ifdef CONFIG_ARM64_VA_BITS_52 + /* IAMROOT20 20231021 + * x0 = 48 || 52 + * x9 = 16 || 12 + */ sub x9, xzr, x0 add x9, x9, #64 tcr_set_t1sz tcr, x9 #else + /* IAMROOT20 20231021 + * x9 = 16 + */ idmap_get_t0sz x9 #endif tcr_set_t0sz tcr, x9 diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index eaa31e567d1ec..fb2c0a22220a3 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -15,12 +15,54 @@ #include +/* IAMROOT20 20240824 + * setup_arch -> bootmem_init -> arch_numa_init -> numa_init -> + * numa_register_nodes -> setup_node_data + */ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); +/* IAMROOT20 20240525 + * numa_nodes_parsed = { bits[1] } + * + * numa_init + * nodes_clear(numa_nodes_parsed) + */ +/* IAMROOT20 20240615 + * numa_add_memblk + * node_set(nid, numa_nodes_parsed); + */ nodemask_t numa_nodes_parsed __initdata; static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; +/* IAMROOT20 20240525 + * numa_distance_cnt = 16 + */ static int numa_distance_cnt; +/* IAMROOT20 20240525 + * 16x16 2차원 행열을 만든다. numa_alloc_distance + * { { 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}, + * { 20, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}, + * { 20, 20, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}, + * ... + * { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 10} } + */ +/* IAMROOT20 20240601 + * hisilicon/hip07-d05.dts + * distance-map { + * compatible = "numa-distance-map-v1"; + * distance-matrix = + * <0 0 10>, <0 1 15>, <0 2 20>, <0 3 25>, + * <1 0 15>, <1 1 10>, <1 2 25>, <1 3 30>, + * <2 0 20>, <2 1 25>, <2 2 10>, <2 3 15>, + * <3 0 25>, <3 1 30>, <3 2 15>, <3 3 10>; + * }; + * { { 10, 15, 20, 25, 20, ..., 20}, + * { 15, 10, 25, 30, 20, ..., 20}, + * { 20, 25, 10, 15, 20, ..., 20}, + * { 25, 30, 15, 10, 20, ..., 20}, + * ... + * { 20, 20, 20, 20, 20, ..., 10} } + */ static u8 *numa_distance; bool numa_off; @@ -239,7 +281,9 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); if (tnid != nid) pr_info("NODE_DATA(%d) on node %d\n", nid, tnid); - + /* IAMROOT20 20240622 + * NODE_DATA(nid) : pglist_data 배열에서 nid 인덱스에 해당하는 배열의 주소 반환 + */ node_data[nid] = nd; memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); NODE_DATA(nid)->node_id = nid; @@ -258,7 +302,9 @@ void __init numa_free_distance(void) if (!numa_distance) return; - + /* IAMROOT20 20240622 + * size = 16 x 16 x 1 = 256 + */ size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); @@ -275,7 +321,17 @@ static int __init numa_alloc_distance(void) size_t size; int i, j; + /* IAMROOT20 20240525 + * ex) MAX_NUMNODES > 1 이면 + * - nr_node_ids = MAX_NUMNODES = 16 + * -> size = 16 * 16 * 1(u8) = 256 + * + * numa_distance : 16 x 16 배열, 원소 하나는 1byte + */ size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]); + /* IAMROOT20 20240525 + * 16x16 2차원 행열을 만든다. + */ numa_distance = memblock_alloc(size, PAGE_SIZE); if (WARN_ON(!numa_distance)) return -ENOMEM; @@ -363,6 +419,9 @@ static int __init numa_register_nodes(void) unsigned long start_pfn, end_pfn; get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + /* IAMROOT20 20240622 + * nid에 해당하는 pglist_data의 필드를 설정 + */ setup_node_data(nid, start_pfn, end_pfn); node_set_online(nid); } @@ -373,6 +432,12 @@ static int __init numa_register_nodes(void) return 0; } +/* IAMROOT20 20240525 + * __init arch_numa_init + * acpi_enable numa_init(arch_acpi_numa_init) + * acpi_disabled numa_init(of_numa_init) + * etc numa_init(dummy_numa_init) + */ static int __init numa_init(int (*init_func)(void)) { int ret; @@ -389,6 +454,8 @@ static int __init numa_init(int (*init_func)(void)) if (ret < 0) goto out_free_distance; + /* IAMROOT20_END 20240615 */ + /* IAMROOT20_START 20240622 */ if (nodes_empty(numa_nodes_parsed)) { pr_info("No NUMA configuration found\n"); ret = -EINVAL; @@ -399,6 +466,10 @@ static int __init numa_init(int (*init_func)(void)) if (ret < 0) goto out_free_distance; + /* IAMROOT20_END 20240622 */ + /* IAMROOT20_START 20240629 + * 각 노드에 대한 비트맵 형태의 CPU 마스크를 할당하고 초기화 + */ setup_node_to_cpumask_map(); return 0; @@ -418,6 +489,11 @@ static int __init numa_init(int (*init_func)(void)) * * Return: 0 on success, -errno on failure. */ +/* IAMROOT20 20240706 + * NUMA를 설정한 시스템이 아닌 경우, 더미 데이터를 위해 + * memblock memory 영역의 region 전체의 nid를 0으로 설정 후 + * numa_nodes_parsed의 0번째 비트를 1로 설정 + */ static int __init dummy_numa_init(void) { phys_addr_t start = memblock_start_of_DRAM(); diff --git a/drivers/of/address.c b/drivers/of/address.c index e692809ff8227..419f16357600e 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -66,7 +66,25 @@ static u64 of_bus_default_map(__be32 *addr, const __be32 *range, int na, int ns, int pna) { u64 cp, s, da; - + /* IAMROOT20 20240615 + * ex) ethernet@0,0 { + * compatible = "smc,smc91c111" + * reg = <0 0 0x1000>; + * }; + * + * ex) range = <0 0 0x10100000 0x10000>; + * + * range : 자식 디바이스가 가질 수 있는 버스 주소의 범위 + * reg : 자식 디바이스의 주소 + * + * cp = 0x0000_0000_0000_0000; + * s = 0x0000_0000_0001_0000; + * addr = reg; + * da = 0x0000_0000_0000_0000; + * + * 자식 디바이스의 버스 범위의 시작 주소와, + * 자식 디바이스의 주소의 차를 반환 + */ cp = of_read_number(range, na); s = of_read_number(range + na + pna, ns); da = of_read_number(addr, na); @@ -342,6 +360,38 @@ static int of_bus_default_flags_match(struct device_node *np) /* * Array of bus specific translators */ +/* IAMROOT20 20240608 + * https://github.com/rockchip-toybrick/u-boot/blob/master/common/fdt_support.c + * + * struct of_bus - Callbacks for bus specific translators + * @name: A string used to identify this bus in debug output. + * @addresses: The name of the DT property from which addresses are + * to be read, typically "reg". + * @match: Return non-zero if the node whose parent is at + * parentoffset in the FDT blob corresponds to a bus + * of this type, otherwise return zero. If NULL a match + * is assumed. + * @count_cells:Count how many cells (be32 values) a node whose parent + * is at parentoffset in the FDT blob will require to + * represent its address (written to *addrc) & size + * (written to *sizec). + * @map: Map the address addr from the address space of this + * bus to that of its parent, making use of the ranges + * read from DT to an array at range. na and ns are the + * number of cells (be32 values) used to hold and address + * or size, respectively, for this bus. pna is the number + * of cells used to hold an address for the parent bus. + * Returns the address in the address space of the parent + * bus. + * @translate: Update the value of the address cells at addr within an + * FDT by adding offset to it. na specifies the number of + * cells used to hold the address being translated. Returns + * zero on success, non-zero on error. + * + * Each bus type will include a struct of_bus in the of_busses array, + * providing implementations of some or all of the functions used to + * match the bus & handle address translation for its children. + */ static struct of_bus of_busses[] = { #ifdef CONFIG_PCI @@ -466,9 +516,25 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, pr_debug("walking ranges...\n"); /* Now walk through the ranges */ + /* IAMROOT20 20240615 + * ranges = <자식주소 부모주소 자식주소 크기>; + * + * ranges = <0 0 0x10100000 0x10000 + * 1 0 0x10160000 0x10000 + * 2 0 0x30000000 0x1000000>; + * + * rlen = 12 * 4; + * rlen /= 4 = 12; + * rone = 4 = ranges 필드 하나 + */ rlen /= 4; rone = na + pna + ns; for (; rlen >= rone; rlen -= rone, ranges += rone) { + /* IAMROOT20 20240615 + * of_busses[] = { ... + * .map = of_bus_default_map, + * ... }; + */ offset = bus->map(addr, ranges, na, ns, pna); if (offset != OF_BAD_ADDR) break; @@ -477,13 +543,24 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, pr_debug("not found !\n"); return 1; } + /* IAMROOT20 20240615 + * addr에는 ranges의 OF_BAD_ADDR가 아닌 필드의 부모 주소가 복사된다. + */ memcpy(addr, ranges + na, 4 * pna); finish: of_dump_addr("parent translation for:", addr, pna); pr_debug("with offset: %llx\n", offset); + /* IAMROOT20 20240615 + * of_busses[] = { ... + * .translation = of_bus_default_translate, + * ... }; + */ /* Translate it into parent bus space */ + /* IAMROOT20 20240615 + * addr에 offset을 더해서 반환. + */ return pbus->translate(addr, offset, pna); } @@ -522,6 +599,9 @@ static u64 __of_translate_address(struct device_node *dev, parent = get_parent(dev); if (parent == NULL) goto bail; + /* IAMROOT20 20240608 + * of_match_bus 안에 BUG() 함수가 있으므로 NULL이 반환되지 않음. + */ bus = of_match_bus(parent); /* Count address cells & copy address locally */ @@ -530,6 +610,9 @@ static u64 __of_translate_address(struct device_node *dev, pr_debug("Bad cell count for %pOF\n", dev); goto bail; } + /* IAMROOT20 20240608 + * addr에 in_addr(reg 노드의 property)의 4바이트 단위로 복사 + */ memcpy(addr, in_addr, na * 4); pr_debug("bus is %s (na=%d, ns=%d) on %pOF\n", @@ -577,6 +660,18 @@ static u64 __of_translate_address(struct device_node *dev, pbus->name, pna, pns, parent); /* Apply bus translation */ + /* IAMROOT20 20240615 + * dev : 함수를 호출한 디바이스의 부모 디바이스 + * bus : 부모 디바이스의 버스 + * pbus : 조부모 디바이스의 버스 + * addr : + * - 함수를 호출한 디바이스의 reg 주소 + * - 이전 루프의 부모 주소 + offset + * na : 부모 디바이스의 address cells 크기 + * ns : 부모 디바이스의 size cells 크기 + * pna : 조부모 디바이스의 address cells 크기 + * rprop : ranges의 property + */ if (of_translate_one(dev, bus, pbus, addr, na, ns, pna, rprop)) break; @@ -599,6 +694,8 @@ u64 of_translate_address(struct device_node *dev, const __be32 *in_addr) struct device_node *host; u64 ret; + /* IAMROOT20_END 20240608 */ + /* IAMROOT20_START 20240615 */ ret = __of_translate_address(dev, of_get_parent, in_addr, "ranges", &host); if (host) { @@ -616,10 +713,28 @@ struct device_node *__of_get_dma_parent(const struct device_node *np) struct of_phandle_args args; int ret, index; + /* IAMROOT20 20240810 + * index = of_property_match_string(np, "interconnect-names", "dma-mem"); + * interconnect-names property에서 "dma-mem"이 몇 번째 인지를 return + * + * ex) interconnect-names = "dma-mem", "write"; + * -> index = 0 + */ index = of_property_match_string(np, "interconnect-names", "dma-mem"); if (index < 0) return of_get_parent(np); + /* IAMROOT20 20240810 + * #interconnect-cells 값이 index인 node를 찾는다 + * + * ex) emc: external-memory-controller@2c60000 { + * ... + * #interconnect-cells = <0>; + * ... + * }; + * + * -> #interconnect-cells 값이 0인 위 emc node를 return + */ ret = of_parse_phandle_with_args(np, "interconnects", "#interconnect-cells", index, &args); @@ -727,12 +842,20 @@ const __be32 *__of_get_address(struct device_node *dev, int index, int bar_no, return NULL; psize /= 4; + /* IAMROOT20 20240608 + * reg = <0x0 0x00000000 0x0 0x40000000>; + * na = 2, ns = 2 + */ onesize = na + ns; for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) { u32 val = be32_to_cpu(prop[0]); /* PCI bus matches on BAR number instead of index */ if (((bar_no >= 0) && ((val & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0))) || ((index >= 0) && (i == index))) { + /* IAMROOT20 20240608 + * *size = 0x0000_0000_4000_0000; + * *flags = 0x0000_0200 = IORESOURCE_MEM; + */ if (size) *size = of_read_number(prop + na, ns); if (flags) @@ -772,6 +895,13 @@ static int parser_init(struct of_pci_range_parser *parser, { int rlen; + /* IAMROOT20_20240803 + * pna = 현재 노드의 parent의 #address-cells + * na = 현재 노드의 #address-cells 이지만, + * 없으면 #address-cells을 가진 parent까지 거슬러 올라감 + * ns = 현재 노드의 #size-cells 이지만, + * 없으면 #size-cells를 가진 parent까지 거슬러 올라감 + */ parser->node = node; parser->pna = of_n_addr_cells(node); parser->na = of_bus_n_addr_cells(node); @@ -779,6 +909,14 @@ static int parser_init(struct of_pci_range_parser *parser, parser->dma = !strcmp(name, "dma-ranges"); parser->bus = of_match_bus(node); + /* IAMROOT20_20240803 + * #address-cells = <3> + * #size-cells = <2> + * + * dma-ranges = <0x02000000 0 0x00000000 0x80000000 0 0x20000000>; + * parser->range = dma-ranges의 property 구조체의 value 값 + * rlen = 6 + */ parser->range = of_get_property(node, name, &rlen); if (parser->range == NULL) return -ENOENT; @@ -806,6 +944,12 @@ EXPORT_SYMBOL_GPL(of_pci_dma_range_parser_init); struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser, struct of_pci_range *range) { + /* IAMROOT20 20240810 + * ex) na = 3, ns = 2, pna = 1 + * dma-ranges = <0x02000000 0 0x00000000 0x00000000 0 0x40000000>; + * |---------------------| |--------| |----------| + * na pna ns + */ int na = parser->na; int ns = parser->ns; int np = parser->pna + na + ns; @@ -817,25 +961,48 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser, if (!parser->range || parser->range + np > parser->end) return NULL; + /* IAMROOT20 20240810 + * flags 값을 읽어 저장 + * dma-ranges = <0x02000000 0 0x00000000 ... >; + * ^--- flags + */ range->flags = parser->bus->get_flags(parser->range); /* A extra cell for resource flags */ if (parser->bus->has_flags) busflag_na = 1; + /* IAMROOT20 20240810 + * bus 주소를 읽음 + * dma-ranges = <0x02000000 0 0x00000000 ... >; + * |----------| + * bus_addr + */ range->bus_addr = of_read_number(parser->range + busflag_na, na - busflag_na); + /* IAMROOT20 20240810 + * cpu 주소를 변환하여 저장 + */ if (parser->dma) range->cpu_addr = of_translate_dma_address(parser->node, parser->range + na); else range->cpu_addr = of_translate_address(parser->node, parser->range + na); + /* IAMROOT20 20240810 + * bus 크기를 저장 + */ range->size = of_read_number(parser->range + parser->pna + na, ns); + /* IAMROOT20 20240810 + * 다음 range로 이동 + */ parser->range += np; /* Now consume following elements while they are contiguous */ + /* IAMROOT20 20240810 + * range가 연속으로 이어져 있는 경우를 처리하기 위함 + */ while (parser->range + np <= parser->end) { u32 flags = 0; u64 bus_addr, cpu_addr, size; @@ -850,12 +1017,20 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser, parser->range + na); size = of_read_number(parser->range + parser->pna + na, ns); + /* IAMROOT20 20240810 + * - flags가 다르거나 + * - bus 주소, cpu 주소가 연속적이지 않은 경우 + * => break + */ if (flags != range->flags) break; if (bus_addr != range->bus_addr + range->size || cpu_addr != range->cpu_addr + range->size) break; + /* IAMROOT20 20240810 + * 주소가 연속적이면 size만 더해줌 + */ range->size += size; parser->range += np; } @@ -1005,9 +1180,16 @@ phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np) if (!np) np = of_root; + /* IAMROOT20_START 20240810 */ + /* IAMROOT20 20240810 + * np(현재 node)에서 "dma-ranges" property가 있는지 확인 + */ ranges = of_get_property(np, "dma-ranges", &len); if (ranges && len) { of_dma_range_parser_init(&parser, np); + /* IAMROOT20 20240810 + * parser에 저장된 dma-ranges 영역을 돌면서, cpu_end의 최대값을 구함 + */ for_each_of_range(&parser, &range) if (range.cpu_addr + range.size > cpu_end) cpu_end = range.cpu_addr + range.size - 1; @@ -1016,6 +1198,10 @@ phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np) max_cpu_addr = cpu_end; } + /* IAMROOT20 20240810 + * np(현재 node)의 자식 node에서 of_dma_get_max_cpu_address 함수를 + * 재귀적으로 호출 + */ for_each_available_child_of_node(np, child) { subtree_max_addr = of_dma_get_max_cpu_address(child); if (max_cpu_addr > subtree_max_addr) @@ -1116,6 +1302,12 @@ static int __of_address_to_resource(struct device_node *dev, int index, int bar_ if (of_mmio_is_nonposted(dev)) flags |= IORESOURCE_MEM_NONPOSTED; + /* IAMROOT20 20240615 + * r->start = 매핑된 디바이스의 시작 주소 + * r->end = 매핑된 디바이스의 끝 주소 + * r->flags = 디바이스의 flags + * r->name = 디바이스의 이름 + */ r->start = taddr; r->end = taddr + size - 1; r->flags = flags; diff --git a/drivers/of/base.c b/drivers/of/base.c index 166fb7d753378..5101b5e44cb9d 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -55,6 +55,12 @@ DEFINE_MUTEX(of_mutex); */ DEFINE_RAW_SPINLOCK(devtree_lock); +/* IAMROOT20 20240525 + * ex) np = 'cpu0: cpu@10000' 노드, name = "cpu" + * + * node_name = "cpu@10000" + * len = 3 + */ bool of_node_name_eq(const struct device_node *np, const char *name) { const char *node_name; @@ -316,6 +322,11 @@ EXPORT_SYMBOL(of_get_property); * 10. type * 11. name */ +/* IAMROOT20 20240615 + * of_numa_parse_distance_map(void) + * of_find_compatible_node(NULL, NULL, "numa-distance-map-v1"); + * __of_device_is_compatible(np, "numa-distance-map-v1", NULL, NULL) + */ static int __of_device_is_compatible(const struct device_node *device, const char *compat, const char *type, const char *name) { @@ -644,6 +655,30 @@ struct device_node *of_get_next_cpu_node(struct device_node *prev) unsigned long flags; struct device_node *node; + /* IAMROOT20 20240525 + * ex) + * cpus { + * #address-cells = <1>; + * #size-cells = <0>; + * + * cpu0: cpu@10000 { + * device_type = "cpu"; + * compatible = "arm,cortex-a72"; + * reg = <0x10000>; + * enable-method = "psci"; + * next-level-cache = <&cluster0_l2>; + * numa-node-id = <0>; + * }; + * + * cpu1: cpu@10001 { + * device_type = "cpu"; + * compatible = "arm,cortex-a72"; + * reg = <0x10001>; + * enable-method = "psci"; + * next-level-cache = <&cluster0_l2>; + * numa-node-id = <0>; + * }; + */ if (!prev) node = of_find_node_by_path("/cpus"); @@ -657,6 +692,10 @@ struct device_node *of_get_next_cpu_node(struct device_node *prev) for (; next; next = next->sibling) { if (__of_device_is_fail(next)) continue; + /* IAMROOT20 20240525 + * node name이 "cpu"로 시작하는 지 확인 - ex) "cpu@10000" + * device_type = "cpu" 인지 확인 + */ if (!(of_node_name_eq(next, "cpu") || __of_node_is_type(next, "cpu"))) continue; @@ -717,6 +756,14 @@ struct device_node *of_get_child_by_name(const struct device_node *node, } EXPORT_SYMBOL(of_get_child_by_name); +/* IAMROOT20 20240525 + * parent 노드의 모든 child의 마지막 path가 path와 일치하는 child를 return + * ex) parent : of_root("/"), path : "aliases" + * child : "/soc", "/chosen", "/aliases" + * -> "/aliases" 노드를 return + * + * __of_find_node_by_path(of_root, "aliases"); + */ struct device_node *__of_find_node_by_path(struct device_node *parent, const char *path) { @@ -735,6 +782,9 @@ struct device_node *__of_find_node_by_path(struct device_node *parent, return NULL; } +/* IAMROOT20 20240525 + * __of_find_node_by_full_path(of_root, "/aliases"); + */ struct device_node *__of_find_node_by_full_path(struct device_node *node, const char *path) { @@ -747,6 +797,14 @@ struct device_node *__of_find_node_by_full_path(struct device_node *node, node = __of_find_node_by_path(node, path); of_node_put(tmp); path = strchrnul(path, '/'); + /* IAMROOT20 20240525 + * separator가 NULL이 아닌 경우 while문 탈출 조건 + * ex) path = "/foo/bar:bao" + * ^---- separator + * 1-round) ^--- path + * 2-round) ^--- path + * : separator < path -> break + */ if (separator && separator < path) break; } @@ -771,19 +829,43 @@ struct device_node *__of_find_node_by_full_path(struct device_node *node, * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ +/* IAMROOT20 20240525 + * ex) path = "i2c2_pins_a: i2c2" + * - opts가 null이 아니면 *opts에 ':' 다음 위치를 저장 + * + * of_find_node_by_path + * of_aliases = of_find_node_by_path("/aliases"); + * of_find_node_opts_by_path("/aliases", NULL); + * + * of_alias_scan + * of_find_node_opts_by_path("/pl011@9000000", &of_stdout_options); + */ struct device_node *of_find_node_opts_by_path(const char *path, const char **opts) { struct device_node *np = NULL; struct property *pp; unsigned long flags; const char *separator = strchr(path, ':'); - if (opts) *opts = separator ? separator + 1 : NULL; + /* IAMROOT20 20240525 + * path = "/" : root path인 경우 of_root를 return + */ if (strcmp(path, "/") == 0) return of_node_get(of_root); + /* IAMROOT20 20240525 + * aliases를 사용하는 path의 경우 + * ex1) path = "foo" + * ^----p + * ex2) path = "foo/bar" + * ^----p + * len : aliases의 길이 + * + * np : aliases가 가리키는 노드 ex) foo aliases의 노드 + * path : p + */ /* The path could begin with an alias */ if (*path != '/') { int len; @@ -887,6 +969,10 @@ EXPORT_SYMBOL(of_find_node_by_type); * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ +/* IAMROOT20 20240615 + * of_numa_parse_distance_map(void) + * of_find_compatible_node(NULL, NULL, "numa-distance-map-v1"); + */ struct device_node *of_find_compatible_node(struct device_node *from, const char *type, const char *compatible) { @@ -1152,6 +1238,7 @@ int of_phandle_iterator_next(struct of_phandle_iterator *it) /* If phandle is 0, then it is an empty entry with no arguments. */ it->phandle = be32_to_cpup(it->cur++); + /* IAMROOT20_END 20240803 */ if (it->phandle) { /* @@ -1729,11 +1816,33 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) /* linux,stdout-path and /aliases/stdout are for legacy compatibility */ const char *name = NULL; + /* IAMROOT20 20240525 + * exam) + * chosen { + * stdout-path = "/pl011@9000000"; + * }; + * name = "/pl011@9000000" + */ if (of_property_read_string(of_chosen, "stdout-path", &name)) of_property_read_string(of_chosen, "linux,stdout-path", &name); if (IS_ENABLED(CONFIG_PPC) && !name) of_property_read_string(of_aliases, "stdout", &name); + /* IAMROOT20 20240525 + * name으로 of_stdout node를 찾는다 + * - ':'이 name에 있으면(옵션 문자열), of_stdout_options에 저장 + * + * exam) name="/pl011@9000000" + * of_stdout = + * pl011@9000000 { + * clock-names = "uartclk\0apb_pclk"; + * clocks = <0x8000 0x8000>; + * interrupts = <0x00 0x01 0x04>; + * reg = <0x00 0x9000000 0x00 0x1000>; + * compatible = "arm,pl011\0arm,primecell"; + * }; + * of_stdout_options = NULL + */ if (name) of_stdout = of_find_node_opts_by_path(name, &of_stdout_options); if (of_stdout) @@ -1743,6 +1852,11 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) if (!of_aliases) return; + /* IAMROOT20 20240525 + * of_aliases 노드의 property를 순회하면서 alias_prop 구조체를 만들어 + * aliases_lookup 리스트에 추가한다 + * - 나중에 aliases가 나오면 노드, name 등을 빠르게 찾기 위해 + */ for_each_property_of_node(of_aliases, pp) { const char *start = pp->name; const char *end = start + strlen(start); @@ -1756,10 +1870,28 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) !strcmp(pp->name, "linux,phandle")) continue; + /* IAMROOT20 20240525 + * eaxm) + * aliases { + * serial0 = &uart0; + * }; + * name = "serial0" + * value=&uart0 + */ np = of_find_node_by_path(pp->value); if (!np) continue; + /* IAMROOT20 20240525 + * aliases { + * serial0 = &uart0; + * ^----------- start = alias = "serial0" + * ^---- end 1 + * ^----- end 2 + * <> id = 0 + * <-----> stem = "serial" + * }; + */ /* walk the alias backwards to extract the id and work out * the 'stem' string */ while (isdigit(*(end-1)) && end > start) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index bf502ba8da958..b1ea6c0a0cab6 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -80,6 +80,10 @@ void __init of_fdt_limit_memory(int limit) } } +/* IAMROOT20 20240330 + * status가 없거나 + * status="ok" 또는 status="okay"이면 true + */ static bool of_fdt_device_is_available(const void *blob, unsigned long node) { const char *status = fdt_getprop(blob, node, "status", NULL); @@ -93,11 +97,18 @@ static bool of_fdt_device_is_available(const void *blob, unsigned long node) return false; } +/* IAMROOT20 20240518 + * 현재 *mem을 align에 맞춰 올림한 후 return + * *mem : *mem += size를 저장 + */ static void *unflatten_dt_alloc(void **mem, unsigned long size, unsigned long align) { void *res; + /* IAMROOT20 20240518 + * PTR_ALIGN(*mem, align) : *mem 값을 align 단위로 올림 + */ *mem = PTR_ALIGN(*mem, align); res = *mem; *mem += size; @@ -135,6 +146,10 @@ static void populate_properties(const void *blob, continue; } + /* IAMROOT20 20240518 + * 현재 node의 name property가 있다면, + * 아래 if (!has_name) 조건문에서 만들지 않는다 + */ if (!strcmp(pname, "name")) has_name = true; @@ -172,10 +187,18 @@ static void populate_properties(const void *blob, /* With version 0x10 we may not have the name property, * recreate it here from the unit name if absent */ + /* IAMROOT20 20240518 + * 현재 node의 name property를 만들어 연결한다 + */ if (!has_name) { const char *p = nodename, *ps = p, *pa = NULL; int len; + /* IAMROOT20 20240518 + * ex) p = "/soc/uart@7e201000" + * ^--- pa + * ^--- ps + */ while (*p) { if ((*p) == '@') pa = p; @@ -213,6 +236,10 @@ static int populate_node(const void *blob, const char *pathp; int len; + /* IAMROOT20 20240518 + * len : node name의 길이 저장 + * pathp : node name의 가상 주소(offset 아님) + */ pathp = fdt_get_name(blob, offset, &len); if (!pathp) { *pnp = NULL; @@ -221,11 +248,19 @@ static int populate_node(const void *blob, len++; + /* IAMROOT20 20240518 + * ex) *mem = 7, len = 7, sizeof(struct device_node) = 208 + * -> *mem = (7 + 1(align 올림)) + 7 + 208 = 223 + * np = (7 + 1(align 올림) = 8 + */ np = unflatten_dt_alloc(mem, sizeof(struct device_node) + len, __alignof__(struct device_node)); if (!dryrun) { char *fn; of_node_init(np); + /* IAMROOT20 20240518 + * device_node 끝에 node name을 저장 + */ np->full_name = fn = ((char *)np) + sizeof(*np); memcpy(fn, pathp, len); @@ -237,6 +272,9 @@ static int populate_node(const void *blob, } } + /* IAMROOT20 20240518 + * node의 '모든 property' + 'node name property'를 추가 + */ populate_properties(blob, offset, mem, np, pathp, dryrun); if (!dryrun) { np->name = of_get_property(np, "name", NULL); @@ -286,14 +324,29 @@ static int unflatten_dt_nodes(const void *blob, struct device_node *dad, struct device_node **nodepp) { + /* IAMROOT20 20240515 + * First pass + * unflatten_dt_nodes(initial_boot_params, NULL, NULL, NULL); + */ + /* IAMROOT20 20240518 + * Second pass + * unflatten_dt_nodes(initial_boot_params, mem, NULL, &of_root) + */ struct device_node *root; int offset = 0, depth = 0, initial_depth = 0; #define FDT_MAX_DEPTH 64 struct device_node *nps[FDT_MAX_DEPTH]; void *base = mem; + /* IAMROOT20 20240511 + * first pass - 크기 계산 -> dryrun = true + * second pass -> dryrun = false + */ bool dryrun = !base; int ret; + /* IAMROOT20 20240511 + * first pass - nodepp = null; + */ if (nodepp) *nodepp = NULL; @@ -319,7 +372,9 @@ static int unflatten_dt_nodes(const void *blob, if (!IS_ENABLED(CONFIG_OF_KOBJ) && !of_fdt_device_is_available(blob, offset)) continue; + /* IAMROOT20_END 20240511 */ + /* IAMROOT20_START 20240518 */ ret = populate_node(blob, offset, &mem, nps[depth], &nps[depth+1], dryrun); if (ret < 0) @@ -343,6 +398,9 @@ static int unflatten_dt_nodes(const void *blob, if (!dryrun) reverse_nodes(root); + /* IAMROOT20 20240518 + * first pass : (mem - base) 로 필요한 크기를 구함 + */ return mem - base; } @@ -368,6 +426,10 @@ void *__unflatten_device_tree(const void *blob, void *(*dt_alloc)(u64 size, u64 align), bool detached) { + /* + * __unflatten_device_tree(initial_boot_params, NULL, &of_root, + * early_init_dt_alloc_memory_arch, false); + */ int size; void *mem; int ret; @@ -411,6 +473,9 @@ void *__unflatten_device_tree(const void *blob, pr_debug(" unflattening %p...\n", mem); + /* IAMROOT20 20240518 + * unflatten_dt_nodes( initial_boot_params, mem, NULL, &of_root) + */ /* Second pass, do actual unflattening */ ret = unflatten_dt_nodes(blob, mem, dad, mynodes); @@ -479,6 +544,9 @@ static u32 of_fdt_crc32; static int __init early_init_dt_reserve_memory(phys_addr_t base, phys_addr_t size, bool nomap) { + /* IAMROOT20 20240330 + * nomap일 경우 + */ if (nomap) { /* * If the memory is already reserved (by another region), we @@ -489,8 +557,14 @@ static int __init early_init_dt_reserve_memory(phys_addr_t base, memblock_is_region_reserved(base, size)) return -EBUSY; + /* IAMROOT20 20240330 + * NOMAP flag 설정하고 reserve 등록안하고 종료 + */ return memblock_mark_nomap(base, size); } + /* IAMROOT20 20240330 + * nomap이 아닐 경우 memblock.reserved에 등록 + */ return memblock_reserve(base, size); } @@ -507,6 +581,9 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, int first = 1; bool nomap; + /* IAMROOT20 20240330 + * reg 프로퍼티 가져옴 + */ prop = of_get_flat_dt_prop(node, "reg", &len); if (!prop) return -ENOENT; @@ -517,12 +594,22 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, return -EINVAL; } + /* IAMROOT20 20240330 + * no-map 프로퍼티 가져옴 + */ nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; while (len >= t_len) { + /* IAMROOT20 20240330 + * reg 프로퍼티에서 base와 size값을 읽음 + */ base = dt_mem_next_cell(dt_root_addr_cells, &prop); size = dt_mem_next_cell(dt_root_size_cells, &prop); + /* IAMROOT20 20240330 + * nomap일 경우 nomap flag만 설정 + * nomap이 아닐 경우 memblock.reserved에 등록 + */ if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", @@ -533,6 +620,13 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, len -= t_len; if (first) { + /* IAMROOT20 20240330 + * reserved_mem에 reg의 첫번째 region 정보만 저장 + */ + /* IAMROOT20_END 20240330 */ + /* IAMROOT20_START 20240406 + * reserved memory 영역을 reserved_mem[64]에 저장한다. + */ fdt_reserved_mem_save_node(node, uname, base, size); first = 0; } @@ -545,6 +639,11 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, * in /reserved-memory matches the values supported by the current implementation, * also check if ranges property has been provided */ +/* IAMROOT20 20240330 + * reserved-memory노드에 size-cells, address-cless, ranges 속성이 있는지 확인 + * 현재 node의 size-cells 값과 dt_root_size_cells 값이 같은지 확인 + * 현재 node의 address-cells 값과 dt_root_address_cells 값이 같은지 확인 + */ static int __init __reserved_mem_check_root(unsigned long node) { const __be32 *prop; @@ -571,6 +670,9 @@ static int __init fdt_scan_reserved_mem(void) int node, child; const void *fdt = initial_boot_params; + /* IAMROOT20 20240330 + * reserved-memory 노드를 가리킴 + */ node = fdt_path_offset(fdt, "/reserved-memory"); if (node < 0) return -ENODEV; @@ -580,16 +682,41 @@ static int __init fdt_scan_reserved_mem(void) return -EINVAL; } + /* IAMROOT20 20240330 + * reserved-memory노드의 sub-node들을 순회 + */ fdt_for_each_subnode(child, fdt, node) { const char *uname; int err; if (!of_fdt_device_is_available(fdt, child)) continue; - + /* IAMROOT20 20240330 + * ex) hyp_mem: hyp@80000000 { + * reg = <0x0 0x80000000 0x0 0x600000>; + * no-map; + * }; + * uname = "hyp_mem" + */ uname = fdt_get_name(fdt, child, NULL); err = __reserved_mem_reserve_reg(child, uname); + /* IAMROOT20 20240406 + * reg property가 없는 경우 -ENOENT를 return + * - size property가 있는 경우에는 reserved_mem[64]에 base와size를 0으로 해서 저장 + * ex) reserved-memory { + * #address-cells = <1>; + * #size-cells = <1>; + * ranges; + * + * linux,cma@80000000 { + * compatible = "shared-dma-pool"; + * alloc-ranges = <0x80000000 0x30000000>; + * size = <0x10000000>; + * linux,cma-default; + * reusable; + * }; + */ if (err == -ENOENT && of_get_flat_dt_prop(child, "size", NULL)) fdt_reserved_mem_save_node(child, uname, 0, 0); } @@ -640,6 +767,10 @@ void __init early_init_fdt_scan_reserved_mem(void) /* Process header /memreserve/ fields */ for (n = 0; ; n++) { + /* IAMROOT20 20240406 + * initial_boot_params(fdt 가상주소)에서 memory reservation block 영역의 + * base, size를 읽어와서 size가 있는 경우 memblock.reserved에 저장 + */ fdt_get_mem_rsv(initial_boot_params, n, &base, &size); if (!size) break; @@ -1071,6 +1202,7 @@ int __init early_init_dt_scan_root(void) dt_root_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT; dt_root_addr_cells = OF_ROOT_NODE_ADDR_CELLS_DEFAULT; + /* IAMROOT20_END 20240224 */ prop = of_get_flat_dt_prop(node, "#size-cells", NULL); if (prop) dt_root_size_cells = be32_to_cpup(prop); @@ -1109,7 +1241,10 @@ int __init early_init_dt_scan_memory(void) /* We are scanning "memory" nodes only */ if (type == NULL || strcmp(type, "memory") != 0) continue; - + + /* IAMROOT20_20240302 START + * status property가 없거나 "ok","okay" 인지 확인 + */ if (!of_fdt_device_is_available(fdt, node)) continue; @@ -1227,10 +1362,17 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) return; } + /* IAMROOT20_20240302 + * base를 PAGE_SIZE 만큼 올림하고, + * size는 (PAGE_SIZE - base offset)만큼 줄인다 + */ if (!PAGE_ALIGNED(base)) { size -= PAGE_SIZE - (base & ~PAGE_MASK); base = PAGE_ALIGN(base); } + /* IAMROOT20_20240302 + * size를 PAGE_SIZE 만큼 내림한다 + */ size &= PAGE_MASK; if (base > MAX_MEMBLOCK_ADDR) { @@ -1281,6 +1423,9 @@ bool __init early_init_dt_verify(void *params) /* Setup flat device-tree pointer */ initial_boot_params = params; + /* IAMROOT20 20240224 + * crc32 알고리즘을 사용하여 이후 fdt에 변경 사항이 생겼는지를 확인한다. + */ of_fdt_crc32 = crc32_be(~0, initial_boot_params, fdt_totalsize(initial_boot_params)); return true; @@ -1301,7 +1446,8 @@ void __init early_init_dt_scan_nodes(void) /* Setup memory, calling early_init_dt_add_memory_arch */ early_init_dt_scan_memory(); - + + /* IAMROOT20_20240316 START */ /* Handle linux,usable-memory-range property */ early_init_dt_check_for_usable_mem_range(); } @@ -1330,7 +1476,9 @@ void __init unflatten_device_tree(void) { __unflatten_device_tree(initial_boot_params, NULL, &of_root, early_init_dt_alloc_memory_arch, false); + /* IAMROOT20_END 20240518 */ + /* IAMROOT20_START 20240525 */ /* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */ of_alias_scan(early_init_dt_alloc_memory_arch); diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 5949829a1b001..5484ef02b26b6 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -20,13 +20,30 @@ * Even though we connect cpus to numa domains later in SMP * init, we need to know the node ids now for all cpus. */ +/* IAMROOT20 20240608 + * in : device tree cpu node + * out : 분석된 numa-id를 numa_nodes_parsed 비트맵에 셋팅. + */ static void __init of_numa_parse_cpu_nodes(void) { u32 nid; int r; struct device_node *np; + /* IAMROOT20 20240525 + * hisilicon/hip07.dtsi 참고 + */ for_each_of_cpu_node(np) { + /* IAMROOT20 20240525 + * ex) cpu1: cpu@10001 { + * device_type = "cpu"; + * compatible = "arm,cortex-a72"; + * reg = <0x10001>; + * enable-method = "psci"; + * next-level-cache = <&cluster0_l2>; + * ------> numa-node-id = <0>; + * }; + */ r = of_property_read_u32(np, "numa-node-id", &nid); if (r) continue; @@ -35,10 +52,17 @@ static void __init of_numa_parse_cpu_nodes(void) if (nid >= MAX_NUMNODES) pr_warn("Node id %u exceeds maximum value\n", nid); else + /* IAMROOT20 20240608 + * numa_nodes_parsed의 bits의 nid번째 비트 필드를 1로 설정 + */ node_set(nid, numa_nodes_parsed); } } +/* IAMROOT20 20240608 + * in : device tree memory node + * out : memblock meory영역 별로 numa-id 설정 + */ static int __init of_numa_parse_memory_nodes(void) { struct device_node *np = NULL; @@ -46,6 +70,15 @@ static int __init of_numa_parse_memory_nodes(void) u32 nid; int i, r; + /* IAMROOT20 20240525 + * ex) memory@0 { + * device_type = "memory"; + * reg = <0x0 0x00000000 0x0 0x40000000>; + * numa-node-id = <0>; + * }; + * device_type = "memory"인 모든 노드를 순회 + * - 'numa-node-id' property 값을 nid에 저장 + */ for_each_node_by_type(np, "memory") { r = of_property_read_u32(np, "numa-node-id", &nid); if (r == -EINVAL) @@ -61,6 +94,12 @@ static int __init of_numa_parse_memory_nodes(void) r = -EINVAL; } + /* IAMROOT20_END 20240525 */ + /* IAMROOT20 20240615 + * 매핑된 디바이스의 주소와 정보를 resource 구조체에 넣고, + * 기존에 있던 memblock 영역에서, start에서 end영역의 + * node id를 nid로 설정한다. + */ for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); @@ -76,6 +115,17 @@ static int __init of_numa_parse_memory_nodes(void) static int __init of_numa_parse_distance_map_v1(struct device_node *map) { + /* IAMROOT20 20240601 + * hisilicon/hip07-d05.dts + * distance-map { + * compatible = "numa-distance-map-v1"; + * distance-matrix = + * <0 0 10>, <0 1 15>, <0 2 20>, <0 3 25>, + * <1 0 15>, <1 1 10>, <1 2 25>, <1 3 30>, + * <2 0 20>, <2 1 25>, <2 2 10>, <2 3 15>, + * <3 0 25>, <3 1 30>, <3 2 15>, <3 3 10>; + * }; + */ const __be32 *matrix; int entry_count; int i; @@ -128,6 +178,17 @@ static int __init of_numa_parse_distance_map(void) int ret = 0; struct device_node *np; + /* IAMROOT20 20240601 + * hisilicon/hip07-d05.dts + * distance-map { + * compatible = "numa-distance-map-v1"; + * distance-matrix = + * <0 0 10>, <0 1 15>, <0 2 20>, <0 3 25>, + * <1 0 15>, <1 1 10>, <1 2 25>, <1 3 30>, + * <2 0 20>, <2 1 25>, <2 2 10>, <2 3 15>, + * <3 0 25>, <3 1 30>, <3 2 15>, <3 3 10>; + * }; + */ np = of_find_compatible_node(NULL, NULL, "numa-distance-map-v1"); if (np) @@ -179,6 +240,12 @@ int __init of_numa_init(void) int r; of_numa_parse_cpu_nodes(); + /* IAMROOT20_START 20240608 */ + /* IAMROOT20 20240615 + * memory 영역의 디바이스 노드를 파싱하여 + * 해당 영역의 memblock에 nid를 설정하고, + * numa_distance 배열을 distance_map의 내용으로 초기화한다. + */ r = of_numa_parse_memory_nodes(); if (r) return r; diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 948efa9f99e3b..f3fd477a5a3cf 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -37,8 +37,15 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, phys_addr_t base; int err = 0; + /* IAMROOT20 20240406 + * MEMBLOCK_ALLOC_ANYWHERE = 0xffff_ffff_ffff_ffff + * SMP_CACHE_BYTES = (1 << 6) = 64 + */ end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end; align = !align ? SMP_CACHE_BYTES : align; + /* IAMROOT20 20240406 + * base = 할당된 메모리 영역의 시작 주소 + */ base = memblock_phys_alloc_range(size, align, start, end); if (!base) return -ENOMEM; @@ -119,8 +126,15 @@ static int __init __reserved_mem_alloc_size(unsigned long node, && of_flat_dt_is_compatible(node, "shared-dma-pool") && of_get_flat_dt_prop(node, "reusable", NULL) && !nomap) + /* IAMROOT20 20240406 + * ex) CMA_MIN_ALIGNMENT_BYTES 2M (VA_BITS:39, page size:4KB) + */ align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES); + /* IAMROOT20_START 20240413 + * "alloc-ranges" property가 있으면, start ~ end 사이에서 memory 할당 + * 없으면, 전체 영역(0 ~ 0xffff_..._ffff)에서 memory 할당 + */ prop = of_get_flat_dt_prop(node, "alloc-ranges", &len); if (prop) { @@ -133,10 +147,18 @@ static int __init __reserved_mem_alloc_size(unsigned long node, base = 0; while (len > 0) { + /* IAMROOT20 20240413 + * alloc-ranges에 base, size가 여러개가 명시되엉 있는 경우 + * 순서대로 할당을 시도하고, 성공하면 break로 빠져나감 + */ start = dt_mem_next_cell(dt_root_addr_cells, &prop); end = start + dt_mem_next_cell(dt_root_size_cells, &prop); + /* IAMROOT20 20240406 + * start ~ end 사이에서 size 만큼reserved memory 영역을 할당한다 + * - base에 할당받은 영역의 시작 주소가 저장된다 + */ ret = early_init_dt_alloc_reserved_memory_arch(size, align, start, end, nomap, &base); if (ret == 0) { @@ -145,6 +167,12 @@ static int __init __reserved_mem_alloc_size(unsigned long node, (unsigned long)(size / SZ_1M)); break; } + /* IAMROOT20 20240413 + * alloc-ranges에 base, size가 여러개가 명시되어 있는 경우 + * 다음 range에서 할당하기 위해서 len -= t_len을 수행 + * ex) alloc-ranges = <0x80000000 0x30000000 + * 0x90000000 0x30000000>; + */ len -= t_len; } @@ -180,6 +208,11 @@ static int __init __reserved_mem_init_node(struct reserved_mem *rmem) const struct of_device_id *i; int ret = -ENOENT; + /* IAMROOT20 20240413 + * __reservedmem_of_table은 RESERVEDMEM_OF_DECLARE 매크로로 of_device_id 구조체를 등록함 + * ex) RESERVEDMEM_OF_DECLARE(tegra210_emc_table, "nvidia,tegra210-emc-table", + * tegra210_emc_table_init); + */ for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) { reservedmem_of_init_fn initfn = i->data; const char *compat = i->compatible; @@ -227,6 +260,9 @@ static void __init __rmem_check_for_overlap(void) if (reserved_mem_count < 2) return; + /* IAMROOT20 20240406 + * overlap을 확인하기 전에 reserved_mem의 base, size 순으로 정렬한다 + */ sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]), __rmem_cmp, NULL); for (i = 0; i < reserved_mem_count - 1; i++) { @@ -234,7 +270,10 @@ static void __init __rmem_check_for_overlap(void) this = &reserved_mem[i]; next = &reserved_mem[i + 1]; - + + /* IAMROOT20 20240406 + * this영역과 next 영역이 overlap하는 경우 error log 출력 + */ if (this->base + this->size > next->base) { phys_addr_t this_end, next_end; diff --git a/drivers/of/property.c b/drivers/of/property.c index ddc75cd50825e..6e7b3ae702ecf 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -519,6 +519,10 @@ int of_property_read_string_helper(const struct device_node *np, l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; + /* IAMROOT20 20240608 + * out_strs에 문자열 p의 주소를 복사 + * - 문자열을 복사하는게 아님 + */ if (out_strs && i >= skip) *out_strs++ = p; } diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 2191c01365317..1c003d0400ab9 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -3405,6 +3405,11 @@ static void * __init dt_alloc_memory(u64 size, u64 align) * * Have to stop before resolving phandles, because that uses kmalloc. */ +/* IAMROOT20 20240525 + * dtb overlay unittest를 위한 dt tree를 생성하여 &overlay_base_root에 저장 + * - overlay에 대한 dtbo는 scripts/Makefile.lib에서 생성 + * -> 정확하게 어떤 부분에서 생성하는 지는 모름... + */ void __init unittest_unflatten_overlay_base(void) { struct overlay_info *info; diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c index a5fbb6ed38aed..2b80465b0f3ff 100644 --- a/drivers/tty/serial/earlycon.c +++ b/drivers/tty/serial/earlycon.c @@ -36,6 +36,11 @@ static struct earlycon_device early_console_dev = { .con = &early_con, }; +/* IAMROOT20_20240323 + * boot command line 파라티머로 들어온 physical address를 + * FIX_EARLYCON_MEM_BASE 에 매핑한다. + * FIX_EARLYCON_MEM_BASE virtual address + page offset을 반환한다 + */ static void __iomem * __init earlycon_map(resource_size_t paddr, size_t size) { void __iomem *base; @@ -60,6 +65,11 @@ static void __init earlycon_init(struct earlycon_device *device, size_t len; /* scan backwards from end of string for first non-numeral */ + /* IAMROOT20_20240223 + * ex) name = "pl011" + * - earlycon->index = 011 + * - earlycon->name = "pl" + */ for (s = name + strlen(name); s > name && s[-1] >= '0' && s[-1] <= '9'; s--) @@ -96,6 +106,12 @@ static int __init parse_options(struct earlycon_device *device, char *options) int length; resource_size_t addr; + /* IAROOT20_20240223 + * ex) options = "0x3f201000,115200n8" + * - iotype = UPIO_MEM + * - addr = 0x3f201000 + * - options = "115200n8" + */ if (uart_parse_earlycon(options, &port->iotype, &addr, &options)) return -EINVAL; @@ -147,6 +163,10 @@ static int __init register_earlycon(char *buf, const struct earlycon_id *match) spin_lock_init(&port->lock); if (!port->uartclk) port->uartclk = BASE_BAUD * 16; + /* IAMROOT20_20240223 + * port->mapbase를 FIX_EARLYCON_MEM_BASE에 매핑하고, + * virtual address를 다시 저장 + */ if (port->mapbase) port->membase = earlycon_map(port->mapbase, 64); @@ -192,6 +212,15 @@ int __init setup_earlycon(char *buf) return -EALREADY; again: + /* IAMROOT20_20240323 + * ex) rpi2 early console : "earlycon=pl011,0x3f201000,115200n8" + * OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup); + * + * struct earlycon_id + * - name = "pl011" + * - compatible = "arm,pl011" + * - setup = pl011_early_console_setup + */ for (match = __earlycon_table; match < __earlycon_table_end; match++) { size_t len = strlen(match->name); @@ -201,7 +230,13 @@ int __init setup_earlycon(char *buf) /* prefer entries with empty compatible */ if (empty_compatible && *match->compatible) continue; - + /* IAMROOT20_20240323 + * ex) buf = "pl011,0x3f201000,115200n8" + * buf[len] -> ','를 가리킴 + * + * buf += len + 1; 이후에 + * buf = "0x3f201000,115200n8" + */ if (buf[len]) { if (buf[len] != ',') continue; @@ -227,6 +262,9 @@ int __init setup_earlycon(char *buf) bool earlycon_acpi_spcr_enable __initdata; /* early_param wrapper for setup_earlycon() */ +/* IAMROOT20_20240323 + * ex) buf = "ttyS0,115200n8" + */ static int __init param_setup_earlycon(char *buf) { int err; diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h index 03f721a8a2b19..c5e2ed1563161 100644 --- a/include/asm-generic/bitops/__fls.h +++ b/include/asm-generic/bitops/__fls.h @@ -14,6 +14,24 @@ static __always_inline unsigned long __fls(unsigned long word) { int num = BITS_PER_LONG - 1; +/* IAMROOT20 20240629 + * 값이 있는 비트가 나올 때까지 절반씩 범위를 줄여가며 찾는다. + * word = 0x0000_0000_0000_0003 + * - (word & (0xffff_ffff_0000_0000)), num = 31, word = 0x0000_0003_0000_0000 + * - (word & (0xffff_0000_0000_0000)), num = 15, word = 0x0003_0000_0000_0000 + * - (word & (0xff00_0000_0000_0000)), num = 7, word = 0x0300_0000_0000_0000 + * - (word & (0xf000_0000_0000_0000)), num = 3, word = 0x3000_0000_0000_0000 + * - (word & (0xC000_0000_0000_0000)), num = 1, word = 0xC000_0000_0000_0000 + * - (word & (0x8000_0000_0000_0000)), num = 1 + * + * word = 0x0000_0000_0000_0005 + * - (word & (0xffff_ffff_0000_0000)), num = 31, word = 0x0000_0005_0000_0000 + * - (word & (0xffff_0000_0000_0000)), num = 15, word = 0x0005_0000_0000_0000 + * - (word & (0xff00_0000_0000_0000)), num = 7, word = 0x0500_0000_0000_0000 + * - (word & (0xf000_0000_0000_0000)), num = 3, word = 0x5000_0000_0000_0000 + * - (word & (0xC000_0000_0000_0000)), num = 3, word = 0x5000_0000_0000_0000 + * - (word & (0x8000_0000_0000_0000)), num = 2 + */ #if BITS_PER_LONG == 64 if (!(word & (~0ul << 32))) { num -= 32; diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index 71ab4ba9c25d1..fbbadb264495d 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -15,6 +15,10 @@ static __always_inline void arch_set_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); + /* IAMROOT20 20240608 + * nr번째 비트 필드의 값을 1로 세팅한다 + * *p |= BIT_MASK(nr) + */ arch_atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p); } @@ -36,10 +40,30 @@ static __always_inline int arch_test_and_set_bit(unsigned int nr, volatile unsigned long *p) { long old; + /* IAMROOT20 20240127 + * BIT_WORD() : 몇 번째 word(long)을 쓸 것인지 + * BIT_MASK() : word 내에서 해당 bit를 set + */ unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); old = arch_atomic_long_fetch_or(mask, (atomic_long_t *)p); + /* IAMROOT20 20240127 + * ex) + * 1) old값에 mask bit가 set되어 있지 않은 경우 + * old = 0001 + * mask = 0010 + * + * old & mask = 0000 + * !!(old & mask) -> false + * + * 2) old값에 mask bit가 set되어 있는 경우 + * old = 0011 + * mask = 0010 + * + * old & mask = 0010 + * !!(0ld & mask) -> true + */ return !!(old & mask); } diff --git a/include/asm-generic/bitops/generic-non-atomic.h b/include/asm-generic/bitops/generic-non-atomic.h index 564a8c675d858..29dbf02cb0fe4 100644 --- a/include/asm-generic/bitops/generic-non-atomic.h +++ b/include/asm-generic/bitops/generic-non-atomic.h @@ -125,6 +125,13 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) * so `volatile` must always stay here with no cast-aways. See * `Documentation/atomic_bitops.txt` for the details. */ + /* IAMROOT20 20240706 + * exam) nr = 3 + * 0x0000_0000_0000_0001 & (addr >> (0x0000_0000_0000_0011 & 0x0000_0000_0011_1111)) + * 0x0000_0000_0000_0001 & (addr >> 3) + * addr의 2번째 인덱스까지 모두 삭제 + * addr의 3번째 인덱스의 비트가 1로 되어있는지 확인 + */ return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } diff --git a/include/asm-generic/bitops/instrumented-atomic.h b/include/asm-generic/bitops/instrumented-atomic.h index 4225a8ca9c1a0..fafa7a5d371a4 100644 --- a/include/asm-generic/bitops/instrumented-atomic.h +++ b/include/asm-generic/bitops/instrumented-atomic.h @@ -67,6 +67,10 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) */ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { + /* IAMROOT20 20240127 + * KCSAN - Kernel Concurrency Sanitizer + * - 커널 공간에서 동작하는 동적 data race 탐지 기능 + */ kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); return arch_test_and_set_bit(nr, addr); diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index 8cc7b09c1bc71..42cc6bf037af3 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -18,6 +18,14 @@ #include #include +/* IAMROOT20 20231216 + * FIX_PMD 971 + * 0xfffffbfffdc35000 = 0xffff_fbff_fe00_0000 - (971 << PAGE_SHIFT) + * FIX_PMD 972 + * 0xfffffbfffdc34000 = 0xffff_fbff_fe00_0000 - (972 << PAGE_SHIFT) + * FIX_PUD 973 + * 0xfffffbfffdc33000 = 0xffff_fbff_fe00_0000 - (973 << PAGE_SHIFT) + */ #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) @@ -71,6 +79,10 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) #endif /* Return a pointer with offset calculated */ +/* IAMROOT20 20231209 + * idx에 해당하는 fixmap(bm_pte[][])에 (phys | flags)를 write하고 + * idx에 해당하는 (virtual 주소 + offset)를 return + */ #define __set_fixmap_offset(idx, phys, flags) \ ({ \ unsigned long ________addr; \ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 6432a7fade913..ad3d7282b3a16 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -41,6 +41,10 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; * translations for raw_cpu_ptr(). */ #ifndef arch_raw_cpu_ptr +/* IAMROOT20 20240127 + * ex) ptr(&cpu_number) __my_cpu_offset(0) + &cpu_number + 0 + */ #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) #endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index da9e5629ea43d..19a7fe636dd97 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -304,6 +304,13 @@ . = ALIGN(8); \ BOUNDED_SECTION_PRE_LABEL(.lsm_info.init, _lsm_info, __start, __end) +/* IAMROOT20 20240203 + * EARLY_LSM_TABLE() + * . = ALIGN(8); + * __start_early_lsm_info=.; + * KEEP(*(.early_lsm_info.init)) + * __end_early_lsm_info=.; + */ #define EARLY_LSM_TABLE() \ . = ALIGN(8); \ BOUNDED_SECTION_PRE_LABEL(.early_lsm_info.init, _early_lsm_info, __start, __end) diff --git a/include/linux/align.h b/include/linux/align.h index 2b4acec7b95a2..d3456c152675a 100644 --- a/include/linux/align.h +++ b/include/linux/align.h @@ -5,6 +5,11 @@ #include /* @a is a power of 2 value */ +/* IAMROOT20 20240518 + * ALIGN(14, 8) + * -> (14+7) & ~7 = 21 & 0xffff_..._fff8 + * -> 16 + */ #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) #define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index a6e4437c5f369..19993804fb86d 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -2162,6 +2162,10 @@ arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); + /* IARMROOT24 20240127 + * arch_atomic64_fetch_or_relaxed(i, v) + * => *v |= i, 이전 v값을 return한다 + */ ret = arch_atomic64_fetch_or_relaxed(i, v); __atomic_post_full_fence(); return ret; diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 7d6d73b781472..32799f1e1cc30 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -234,7 +234,42 @@ extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, int nmaskbits, loff_t off, size_t count); +/* IAMROOT20 20240525 + * exam) start = 1 --> 0xffff_ffff_ffff_fffe + * exam) start = 2 --> 0xffff_ffff_ffff_fffc + * exam) start = 3 --> 0xffff_ffff_ffff_fff8 + * exam) start = 4 --> 0xffff_ffff_ffff_fff0 + * exam) start = 16 --> 0xffff_ffff_ffff_0000 + */ #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) +/* IAMROOT20 20240525 + * BITMAP_LAST_WORD_MASK(nbits) : 0번째 bit 부터 (nbits-1) 번째 bit 까지 1로 set + * 0b 0000...000111...111 + * ^--- 0 번째 + * ^--- (nbits - 1) 번째 + * + * exam) nbits = 16 + * ~0UL >> (-16 & 63) + * = ~0UL >> (0xffff_ffff_ffff_fff0 & 0x3f) + * = ~0UL >> 0x30 + * = ~0UL >> 48 + * = 0x0000_0000_0000_ffff + * + * exam) nbits = 64 + * ~0UL >> (-64 & 63) + * = ~0UL >> (0xffff_ffff_ffff_ffC0 & 0x3f) + * = ~0UL >> 0x0 + * = 0x_ffff_ffff_ffff_ffff + * + * exam) nbits = 1 --> 0x0000_0000_0000_0001 + * exam) nbits = 2 --> 0x0000_0000_0000_0003 + * exam) nbits = 3 --> 0x0000_0000_0000_0007 + * exam) nbits = 17 --> 0x0000_0000_0001_ffff + * exam) nbits = 30 --> 0x0000_0000_3fff_ffff + * exam) nbits = 31 --> 0x0000_0000_7fff_ffff + * exam) nbits = 64 --> 0xffff_ffff_ffff_ffff + * exam) nbits = 65 --> 0x0000_0000_0000_0001 + */ #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 2ba557e067fe6..025a2d017645e 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -15,7 +15,19 @@ # define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) #endif +/* IAMROOT20 20240127 + * ex) type(long) + * (sizeof(long) * BITS_PER_BYTE) + * (8 * 8) + * (64) + */ #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +/* IAMROOT20 20240127 + * ex) nr(256) + * __KERNEL_DIV_ROUND_UP(256, BITS_PER_TYPE(long)) + * __KERNEL_DIV_ROUND_UP(256, 64) + * (4) + */ #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) #define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) diff --git a/include/linux/bits.h b/include/linux/bits.h index 7c0cf5031abe8..c7f17aa27b9c9 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -33,12 +33,22 @@ #define __GENMASK(h, l) \ (((~UL(0)) - (UL(1) << (l)) + 1) & \ (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) +/* IAMROOT20 20240427 + * ex) GENMASK(16, 4) 0x0000_0000_0001_fff0 + * GENMASK(15, 0) 0x0000_0000_0000_ffff + * GENMASK(47, 0) 0x0000_ffff_ffff_ffff + */ #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) #define __GENMASK_ULL(h, l) \ (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +/* IAMROOT20 20231216 + * 64bit 값중 lbit부터 h비트값이 1인 마스크값을 만든다. + * exam) GENMASK( 7, 4) => 0x0000_0000_00f0 + * GENMASK(11, 8) => 0x0000_0000_0f00 + */ #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8a0d5466c7be1..df3922f6cc070 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -42,6 +42,11 @@ struct poll_table_struct; #define SUBSYS(_x) _x ## _cgrp_id, enum cgroup_subsys_id { #include + /* IAMROOT20 20240113 + * SUBSYS(cpuset) --> cpuset_cgrp_id, + * SUBSYS(cpu) --> cpu_cgrp_id, + * SUBSYS(memory) --> memory_cgrp_id, + */ CGROUP_SUBSYS_COUNT, }; #undef SUBSYS @@ -241,6 +246,11 @@ struct css_set { * css_set_rwsem, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ + /* IAMROOT20 20240120 + * 이 cgroup 그룹을 사용하여 실행 중인 모든 작업을 나열합니다. + * mg_tasks는 이 cset에 속하지만 마이그레이션되거나 마이그레이션되는 과정에 있는 작업을 나열합니다. + * css_set_rwsem으로 보호되지만 마이그레이션 중에 작업이 mg_tasks로 이동되면 cgroup_mutex를 유지하는 동안 안전하게 읽을 수 있습니다. + */ struct list_head tasks; struct list_head mg_tasks; struct list_head dying_tasks; diff --git a/include/linux/cma.h b/include/linux/cma.h index 63873b93deaa6..32d3ae3250137 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -21,6 +21,10 @@ * -- can deal with only some pageblocks of a higher-order page being * MIGRATE_CMA, we can use pageblock_nr_pages. */ +/* IAMROOT20 20240416 + * CMA_MIN_ALIGNMENT_PAGES 9 + * CMA_MIN_ALIGNMENT_BYTES 2M_SZ + */ #define CMA_MIN_ALIGNMENT_PAGES pageblock_nr_pages #define CMA_MIN_ALIGNMENT_BYTES (PAGE_SIZE * CMA_MIN_ALIGNMENT_PAGES) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7779a18b24fc..ae759bd227e4e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -163,6 +163,10 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif #ifndef RELOC_HIDE +/* IAMROOT20 20240127 + * ex) ptr(&cpu_number) off(0) + * (typeof(&cpu_number)) (&cpu_number + (0)) + */ # define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ __ptr = (unsigned long) (ptr); \ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index e659cb6fded39..33d76d55dc372 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -140,6 +140,10 @@ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute */ #if __has_attribute(__externally_visible__) +/* IAMROOT20 20240106 + * __externally_visible__ : 파일에게 이 함수 또는 변수를 사용할 수 없음으로 + * 표시하지 않도록 이 함수 또는 변수를 사용한다고 알려줍니다 + */ # define __visible __attribute__((__externally_visible__)) #else # define __visible diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 547ea1ff806eb..771786bc064f3 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -16,6 +16,16 @@ #endif /* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ +/* IAMROOT20 20240120 + * sparse 설명 : + * https://kldp.org/node/96789 + * https://pinocc.tistory.com/144 + * https://www.kernel.org/doc/Documentation/dev-tools/sparse.rst + * https://en.wikipedia.org/wiki/Sparse + * https://sparse.docs.kernel.org/en/latest/annotations.html + * + * __force는 sparse 속성이 없더라도 경고를 내지 않는다. + */ #ifdef __CHECKER__ /* address spaces */ # define __kernel __attribute__((address_space(0))) diff --git a/include/linux/container_of.h b/include/linux/container_of.h index 713890c867bea..45f849a931f27 100644 --- a/include/linux/container_of.h +++ b/include/linux/container_of.h @@ -15,6 +15,9 @@ * * WARNING: any const qualifier of @ptr is lost. */ +/* IAMROOT20 20240120 + * structure에 있는 member를 가지고 structure주소를 알아낸다. + */ #define container_of(ptr, type, member) ({ \ void *__mptr = (void *)(ptr); \ static_assert(__same_type(*(ptr), ((type *)0)->member) || \ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ca736b05ec7b0..a2aea753d229b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -16,6 +16,12 @@ #include /* Don't assign or return these: may not be this big! */ +/* IAMROOT20 20240127 + * __cpu_possible_mask, __cpu_online_mask, __cpu_present_mask, __cpu_active_mask는 모두 struct cpumask 형으로 선언되어 있음 + * typedef struct cpumask { + * unsigned long bits[4]; + * } cpumask_t; + */ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; /** @@ -74,6 +80,17 @@ static inline void set_nr_cpu_ids(unsigned int nr) * optimization comes from being able to potentially use a compile-time * constant instead of a run-time generated exact number of CPUs. */ +/* IAMROOT20 20240127 + * optimization을 위해 NR_CPUS 값에 따라 small/large_cpumask_bits를 설정 + * ex) BITS_PER_LONG = 64 + * 1) NR_CPUS <= 64 + * small, large -> NR_CPUS + * 2) 64 < NR_CPUS <= 4*64(256) + * small -> nr_cpu_ids + * large -> NR_CPUS + * 3) 256 < NR_CPUS + * small, large -> nr_cpus_ids + */ #if NR_CPUS <= BITS_PER_LONG #define small_cpumask_bits ((unsigned int)NR_CPUS) #define large_cpumask_bits ((unsigned int)NR_CPUS) @@ -515,6 +532,9 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum */ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { + /* IAMROOT20 20240127 + * test_and_set_bit(cpu, cpumask->bits) + */ return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } diff --git a/include/linux/err.h b/include/linux/err.h index a139c64aef2ac..d79168a4a2980 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -15,10 +15,28 @@ * This should be a per-architecture thing, to allow different * error and pointer decisions. */ +/* IAMROOT20 20240120 + * 커널 포인터에는 중복된 정보가 있으므로 오류 코드나 동일한 반환 값을 가진 일반 + * 포인터를 반환할 수 있는 체계를 사용할 수 있습니다. + * 이는 다양한 오류 및 포인터 결정을 허용하기 위해 아키텍처별로 이루어져야 합니다. + */ #define MAX_ERRNO 4095 #ifndef __ASSEMBLY__ +/* IAMROOT20 20240120 + * x > (unsigned long)-4095 + * -> x > 0xffff_ffff_ffff_f000 + * 커널영역주소는 0xffff_0000_0000_0000 ~ 0xffff_ffff_ffff_ffff + * -1(0xffff_ffff_ffff_ffff) ~ -4095(0xffff_ffff_ffff_f000)가 에러, 그 외에는 주소 + * + * 에러 번호는 1 ~ 34까지 할당되어 있으며 ERR_PTR함수를 호출할때 -를 붙여 호출한다 + * exam) ERR_PTR(-ENOMEM) + * 따라서 에러 번호는 -1 ~ -34까지 해당되며 unsigned long으로 바꾸면 + * 0xffff_ffff_ffff_ffff(-1) ~ 0xffff_ffff_ffff_ffde(-34)에 해당된다. + * + * include/uapi/asm-generic/errno-base.h 참고 + */ #define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) static inline void * __must_check ERR_PTR(long error) diff --git a/include/linux/idr.h b/include/linux/idr.h index a0dce14090a9e..69146b5bf82b6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -8,6 +8,10 @@ * Small id to pointer translation service avoiding fixed sized * tables. */ +/* IAMROOT20 20240120 + * IDA, IDR 한글 설명. + * https://velog.io/@mythos/Linux-Tutorial-23-IDRID-Radix-IDAID-Allocator + */ #ifndef __IDR_H__ #define __IDR_H__ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5ec0fa71399e4..0a738052d2f8e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -170,6 +170,9 @@ extern void warn_bogus_irq_restore(void); /* * Wrap the arch provided IRQ routines to provide appropriate checks. */ +/* IAMROOT20 20240120 + raw_local_irq_disable은 arch_local_irq_disable로 치환됨 + */ #define raw_local_irq_disable() arch_local_irq_disable() #define raw_local_irq_enable() arch_local_irq_enable() #define raw_local_irq_save(flags) \ @@ -240,6 +243,11 @@ extern void warn_bogus_irq_restore(void); #else /* !CONFIG_TRACE_IRQFLAGS */ #define local_irq_enable() do { raw_local_irq_enable(); } while (0) +/* IAMROOT20 20240120 + local_irq_disable은 raw_local_irq_disable로 치환됨 + local: 현재 PE를 의미함 + remote: 현재 PE를 제외한 나머지를 의미함 + */ #define local_irq_disable() do { raw_local_irq_disable(); } while (0) #define local_irq_save(flags) do { raw_local_irq_save(flags); } while (0) #define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index ab2b2fafa4a45..0b9e6079030f5 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -29,12 +29,29 @@ #include #include +/* IAMROOT20 20240203 + * union security_list_options { + * int (*binder_set_context_mgr)(const struct cred *mgr); + * int (*binder_transaction)(const struct cred *from,const struct cred *to); + * ... + * }; + */ union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); #include "lsm_hook_defs.h" #undef LSM_HOOK }; +/* IAMROOT20 20240203 + * struct security_hook_heads { + * struct hlist_head binder_set_context_mgr; + * struct hlist_head binder_transaction; + * .... + * } __attribute__((__designated_init__)) __attribute__((randomize_layout)); + * __attribute__((__designated_init__)): 지정된 초기화 사용 + * __attribute__((randomize_layout)): 구조체 필드 배치 무작위화 + * 구조체 필드 위치가 랜덤하게 결정되면 초기화시 의도했던대로 안되기 때문에 지정된 초기화 옵션을 사용하는 것으로 보임 + */ struct security_hook_heads { #define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME; #include "lsm_hook_defs.h" diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f82ee3fac1cdf..d371ea97e906d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -200,6 +200,9 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL */ +/* IAMROOT20 20240406 + * ULLONG_MAX가 반환될 때까지 역순으로 순회하며, 가장 첫 번째 reserved되지 않은 영역으로 설정 + */ #define __for_each_mem_range_rev(i, type_a, type_b, nid, flags, \ p_start, p_end, p_nid) \ for (i = (u64)ULLONG_MAX, \ @@ -340,6 +343,9 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask); * Walks over free (memory && !reserved) areas of memblock. Available as * soon as memblock is initialized. */ +/* IAMROOT20 20240309 + * 루프를 돌며 memory 영역에서 reserved 영역을 제외한 영역인 free 메모리 영역을 알아옴 + */ #define for_each_free_mem_range(i, nid, flags, p_start, p_end, p_nid) \ __for_each_mem_range(i, &memblock.memory, &memblock.reserved, \ nid, flags, p_start, p_end, p_nid) diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 396df1121bffb..c1293b90e44cc 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -108,6 +108,14 @@ * This macro does strict typechecking of @lo/@hi to make sure they are of the * same type as @val. See the unnecessary pointer comparisons. */ +/* IAMROOT20 20240824 + * clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) + * - val이 [lo, hi] 영역을 벗어나면 lo로 올리거나 hi로 내림 + * - val이 [lo, hi] 사이에 있는 경우 변경 x + * ex) clamp(1, 10, 20) = 10 + * clamp(22, 10, 20) = 20 + * clamp(15, 10, 20) = 15 + */ #define clamp(val, lo, hi) __careful_clamp(val, lo, hi) /* diff --git a/include/linux/mm.h b/include/linux/mm.h index 9e10485f37e7f..5d0abc4f1886c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -106,7 +106,16 @@ extern int mmap_rnd_compat_bits __read_mostly; #define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x))) #endif +/* IAMROOT20 20240427 + * lm_alias : vmalloc 영역의 커널 이미지 가상 주소(x)에서 + * 리니어 매핑 영역에 매핑되어 있는 가상 주소 반환 + */ #ifndef lm_alias +/* IAMROOT20 20240427 + * Linear Mappling alias + * PAGE_OFFSET | (x - kimage_voffset - PHYS_OFFSET) + * -> 0xffff_0000_~ | (x - kimage_voffset - memstart_addr) + */ #define lm_alias(x) __va(__pa_symbol(x)) #endif @@ -1560,6 +1569,10 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags) } #endif +/* IAMROOT20 20241005 + * SPARSEMEM 이지만 VMEMMAP을 사용하지 않는 경우에만, + * page flags에 SECTION 정보를 저장 + */ #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 306a3d1a0fa65..5aa82abe6eff1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -396,6 +396,10 @@ FOLIO_MATCH(compound_head, _head_2); /* * Used for sizing the vmemmap region on some architectures */ + /* + * IAMROOT20 20231129: + * STRUCT_PAGE_MAX_SHIFT 6 + */ #define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page))) #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a4889c9d4055b..f3a3c9cac7887 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -28,6 +28,9 @@ #ifndef CONFIG_ARCH_FORCE_MAX_ORDER #define MAX_ORDER 10 #else +/* IAMROOT20 20240601 + * MAX_ORDER = 10 + */ #define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif #define MAX_ORDER_NR_PAGES (1 << MAX_ORDER) @@ -1019,6 +1022,12 @@ static inline bool zone_is_empty(struct zone *zone) */ /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ +/* IAMROOT20 20241005 + * SECTION_PGOFF 64 (vmemmap 사용, vmemmap 사용 x : 43) + * NODE_PGOFF 62 + * ZONES_PGOFF 58 + * LAST_CPUID_PGOFF 42 + */ #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) @@ -1051,6 +1060,17 @@ static inline bool zone_is_empty(struct zone *zone) #define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) +/* IAMROOT20 20241005 + * ZONES_WIDTH 2 + * NODES_WIDTH 4 + * SECTION_WIDTH 0 (vmemmap 사용, vmemmap 사용 x : 21) + * LAST_CPUID_SHIFT 16 + * + * ZONES_MASK 0x3 + * NODES_MASK 0xF + * SECTION_MASK 0x1F_FFFF + * LAST_CPUID_MASK 0xFFFF + */ #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) @@ -1720,14 +1740,38 @@ static inline bool movable_only_nodes(nodemask_t *nodes) * PA_SECTION_SHIFT physical address to/from section number * PFN_SECTION_SHIFT pfn to/from section number */ +/* IAMROOT20 20240809 + * PA_SECTION_SHIFT 27 + * PFN_SECTION_SHIFT 15 = 27 - 12 + */ #define PA_SECTION_SHIFT (SECTION_SIZE_BITS) #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) +/* IAMROOT20 20240727 + * SECTIONS_SHIFT = MAX_PHYSMEM_BITS - SECTION_SIZE_BITS + * = 48 - 27 + * 전체 PA에서 가질 수 있는 섹션의 개수 + */ +/* IAMROOT20 20240809 + * NR_MEM_SECTIONS SIZE_2M + */ #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) +/* IAMROOT20 20240720 + * ex) 4K인 경우, + * PAGES_PER_SECTION 0x8000 (1 << PFN_SECTION_SHIFT) = (1 << 15) + * PAGE_SECTION_MASK 0xFFFF_FFFF_FFFF_8000 (~0x7fff) + */ #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) +/* IAMROOT20 20240727 + * PFN_SECTION_SHIFT = 27 - 12 = 15 + * pageblock_order = 9 + * NR_PAGEBLOCK_BITS = 4 + * + * SECTION_BLOCKFLAGS_BITS = (1 << (15 - 9)) * 4 = 256 + */ #define SECTION_BLOCKFLAGS_BITS \ ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS) @@ -1750,6 +1794,11 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SUBSECTION_SHIFT 21 #define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT) +/* IAMROOT20 20240727 + * PFN_SUBSECTION_SHIFT = 21 - 12 = 9 + * PAGES_PER_SUBSECTION = 1 << 9 = 512 + * PAGE_SUBSECTION_MASK = 0xFFFF_FFFF_FFFF_FE00 + */ #define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT) #define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT) #define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1)) @@ -1757,6 +1806,11 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #if SUBSECTION_SHIFT > SECTION_SIZE_BITS #error Subsection size exceeds section size #else +/* IAMROOT20 20240727 + * SECTION_SIZE_BITS = 27 + * SUBSECTION_SHIFT = 21 + * SUBSECTIONS_PER_SECTION = 1 << 6 = 64 + */ #define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT)) #endif @@ -1806,11 +1860,31 @@ struct mem_section { }; #ifdef CONFIG_SPARSEMEM_EXTREME +/* IAMROOT20 20240809 + * SECTIONS_PER_ROOT 256 + */ #define SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) #else #define SECTIONS_PER_ROOT 1 #endif +/* IAMROOT20 20240720 + * ex) 4K, PA=48 인 경우 + * NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT) + * = 2^21 / 2^8 = 2^13 = 8192 + * + * NR_MEM_SECTIONS (1 << SECTIONS_SHIFT) = (1 << 21) + * SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) = 21 + * MAX_PHYSMEM_BITS 48 + * SECTION_SIZE_BITS 27 + * + * SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) = 4K / 16 = 256(2^8) + */ +/* IAMROOT20 20240809 + * SECTION_NR_TO_ROOT(sec) (sec / 256) + * NR_SECTION_ROOTS 8196 + * SECTION_ROOT_MASK 0x00ff + */ #define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT) #define NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT) #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) @@ -1826,6 +1900,9 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms) return ms->usage->pageblock_flags; } +/* IAMROOT20 20240720 + * __nr_to_section() : section number를 mem_section 구조체 주소로 변환 + */ static inline struct mem_section *__nr_to_section(unsigned long nr) { unsigned long root = SECTION_NR_TO_ROOT(nr); @@ -1837,6 +1914,9 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) if (!mem_section || !mem_section[root]) return NULL; #endif + /* IAMROOT20 20240809 + * SECTION_ROOT_MASK 0x00ff + */ return &mem_section[root][nr & SECTION_ROOT_MASK]; } extern size_t mem_section_usage_size(void); @@ -1876,6 +1956,12 @@ enum { #ifdef CONFIG_ZONE_DEVICE #define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT) #endif +/* IAMROOT20 20240803 + * SECTION_MAP_LAST_BIT 4 + * BIT(SECTION_MAP_LAST_BIT) = 0b10000 = 0x10 + * + * SECTION_MAP_MASK = (~(0x10 - 1) = ~0xF = 0xFFFF_FFFF_FFFF_FFF0 + */ #define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1)) #define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT @@ -1947,6 +2033,10 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn) extern unsigned long __highest_present_section_nr; +/* IAMROOT20 20241019 + * ~(PAGE_SECTION_MASK): 0x7fff = 2^15 = PAGES_PER_SECTION + * PAGES_PER_SUBSECTION: 2^9 + */ static inline int subsection_map_index(unsigned long pfn) { return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION; @@ -1993,6 +2083,7 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; + /* IAMROOT20_END 20241012 */ /* IAMROOT20_START 20241019 */ ms = __pfn_to_section(pfn); if (!valid_section(ms)) return 0; diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index bb0ee80526b2d..a4d1b0e160051 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -96,6 +96,11 @@ #include #include +/* IAMROOT20 20240525 + * MAX_NUMNODES = 1<<4 = 16 + * + * bits[BITS_TO_LONGS(MAX_NUMNODES)] = bits[1] + */ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; extern nodemask_t _unused_nodemask_arg_; @@ -314,6 +319,10 @@ static inline unsigned int __first_unset_node(const nodemask_t *maskp) #if MAX_NUMNODES <= BITS_PER_LONG +/* IAMROOT20 20240525 + * ex) MAX_NUMNODES = 16 인 경우 + * -> NODE_MASK_ALL = {{ [0] = 0xffff }} + */ #define NODE_MASK_ALL \ ((nodemask_t) { { \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ @@ -458,6 +467,9 @@ extern unsigned int nr_online_nodes; static inline void node_set_online(int nid) { node_set_state(nid, N_ONLINE); + /* IAMROOT20 20240622 + * node_states[N_ONLINE] 비트 필드에 1로 설정된 비트의 개수를 반환 + */ nr_online_nodes = num_node_state(N_ONLINE); } @@ -525,7 +537,15 @@ static inline int node_random(const nodemask_t *maskp) #endif } +/* IAMROOT20 20240525 + * numa_init + * nodes_clear(node_online_map); + */ #define node_online_map node_states[N_ONLINE] +/* IAMROOT20 20240525 + * numa_init + * nodes_clear(node_possible_map); + */ #define node_possible_map node_states[N_POSSIBLE] #define num_online_nodes() num_node_state(N_ONLINE) diff --git a/include/linux/numa.h b/include/linux/numa.h index 59df211d051fa..83ecaa3f77ba5 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -9,6 +9,10 @@ #define NODES_SHIFT 0 #endif +/* IAMROOT20 20240525 + * NODES_SHIFT = CONFIG_NODES_SHIFT = 4 + * MAX_NUMNODES = 1<<4 = 16 + */ #define MAX_NUMNODES (1 << NODES_SHIFT) #define NUMA_NO_NODE (-1) diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 7d79818dc0651..069a9f68c6318 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -28,6 +28,9 @@ #ifdef CONFIG_SPARSEMEM #include +/* IAMROOT20 20240809 + * SECTIONS_SHIFT 21 + */ #define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) #else #define SECTIONS_SHIFT 0 diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e83c4c0950417..8aebcd53ea71d 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,6 +41,12 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ +/* IAMROOT20 20240406 + * ex) VA_BITS : 39, page size : 4KB + * - HUGETLB_PAGE_ORDER 9 + * - MAX_ORDER 10 + * - pageblock_order (unsigned int)9 + */ #define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ @@ -52,6 +58,9 @@ extern unsigned int pageblock_order; #endif /* CONFIG_HUGETLB_PAGE */ +/* IAMROOT20 20240416 + * pageblock_nr_pages 512 + */ #define pageblock_nr_pages (1UL << pageblock_order) #define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) #define pageblock_aligned(pfn) IS_ALIGNED((pfn), pageblock_nr_pages) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index e60727be79c44..5c55a22407423 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -46,6 +46,13 @@ * linkage errors occur due the compiler generating the wrong code to access * that section. */ +/* IAMROOT20 20240127 + * ex) sec("..read_mostly") + * __percpu __attribute__((section(PER_CPU_BASE_SECTION "..read_mostly"))) + * __attribute__((noderef, address_space(__percpu))) __attribute__((section(PER_CPU_BASE_SECTION "..read_mostly"))) + * __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) + * PER_CPU_ATTRIBUTES는 arm64에서 define되어 있지 않음 + */ #define __PCPU_ATTRS(sec) \ __percpu __attribute__((section(PER_CPU_BASE_SECTION sec))) \ PER_CPU_ATTRIBUTES @@ -97,9 +104,19 @@ /* * Normal declaration and definition macros. */ +/* IAMROOT20 20240127 + * ex) type(int) name(cpu_number) sec("..read_mostly") + * extern __PCPU_ATTRS("..read_mostly") __typeof__(int) cpu_number + * extern __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ #define DECLARE_PER_CPU_SECTION(type, name, sec) \ extern __PCPU_ATTRS(sec) __typeof__(type) name +/* IAMROOT20 20240127 + * ex) type(int) name(cpu_number) sec("..read_mostly") + * __PCPU_ATTRS("..read_mostrly") __typeof__(int) cpu_number + * __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_ATTRS(sec) __typeof__(type) name #endif @@ -165,9 +182,19 @@ /* * Declaration/definition used for per-CPU variables that must be read mostly. */ +/* IAMROOT20 20240127 + * ex) type(int) name(cpu_number) + * DECLARE_PER_CPU_SECTION(int, cpu_number, "..read_mostly") + * extern __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ #define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ DECLARE_PER_CPU_SECTION(type, name, "..read_mostly") +/* IAMROOT20 20240127 + * ex) type(int) name(cpu_number) + * DEFINE_PER_CPU_SECTION(int, cpu_number, "..read_mostly") + * __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "..read_mostly") @@ -214,6 +241,10 @@ * + 0 is required in order to convert the pointer type from a * potential array type to a pointer to a single item of the array. */ +/* IAMROOT20 20240127 + * https://stackoverflow.com/questions/30831335/verify-pcpu-ptr-function-in-linux-kernel-what-does-it-do + * http://www.iamroot.org/xe/index.php?mid=Programming&document_srl=208290 + */ #define __verify_pcpu_ptr(ptr) \ do { \ const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL; \ @@ -227,6 +258,13 @@ do { \ * to prevent the compiler from making incorrect assumptions about the * pointer value. The weird cast keeps both GCC and sparse happy. */ +/* IAMROOT20 20240127 + * ex) __p(&cpu_number) __offset(0) + * RELOC_HIDE((typeof(*(&cpu_number)) __kernel __force *)(&cpu_number), (0)) + * RELOC_HIDE((typeof(*(&cpu_number)) ___attribute__((address_space(0))) __attribute__((force)) *)(&cpu_number), (0)) + * 요약하면 RELOC_HIDE((int *) &cpu_number, 0) + * &cpu_number + 0 + */ #define SHIFT_PERCPU_PTR(__p, __offset) \ RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) @@ -236,6 +274,10 @@ do { \ SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))); \ }) +/* IAMROOT20 20240127 + * ex) ptr(&cpu_number) + * &cpu_number + 0 + */ #define raw_cpu_ptr(ptr) \ ({ \ __verify_pcpu_ptr(ptr); \ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index c5a51481bbb90..ce8d6ced054d4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -83,6 +83,11 @@ static inline unsigned long pud_index(unsigned long address) #ifndef pgd_index /* Must be a compile-time constant, so implement it as a macro */ +/* + * IAMROOT20 20231130: + * exam) VA_BITS 48, PAGE_SIZE 4k + * pgd_index(a) a >> 39 & 511 + */ #define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) #endif @@ -846,6 +851,11 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) #endif #ifndef pmd_addr_end +/* IAMROOT20 20231216 + * exam) addr: 0xffff_fbff_fddf_e000 end: 0xffff_fbff_fde1_1000 + * (0xfffffbfffddfe000 + SZ_2M ) & 0xffff_ffff_ffe0_0000 + * = 0xffff_fbff_fde0_0000 + */ #define pmd_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ (__boundary - 1 < (end) - 1)? __boundary: (end); \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dcd2cf1e8326d..4d32d5c4c6d78 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -429,6 +429,15 @@ static inline void rcu_preempt_sleep_check(void) { } */ #ifdef __CHECKER__ +/* IAMROOT20 20240120 + * rcu_check_sparse(p, __rcu) + * --> p에 __rcu 속성이 있는지 체크한다. + * + * sparse 설명 : + * https://kldp.org/node/96789 + * https://www.kernel.org/doc/Documentation/dev-tools/sparse.rst + * https://en.wikipedia.org/wiki/Sparse + */ #define rcu_check_sparse(p, space) \ ((void)(((typeof(*p) space *)p) == p)) #else /* #ifdef __CHECKER__ */ @@ -482,6 +491,16 @@ static inline void rcu_preempt_sleep_check(void) { } * RCU_INITIALIZER() - statically initialize an RCU-protected global variable * @v: The value to statically initialize with. */ +/* IAMROOT20 20240120 + * __force는 v에 __rcu속성이 없더라도 타입케이스팅을 한다. + * exam) + * int a = 10; + * int * v = &a; + * int __rcu r = RCU_INITIALIZER(v); + * -> (typeof(*v) __force __rcu *)(v) + * -> (int __force __rcu *)(v) + * --> int __rcu r = (int __force __rcu *)v; + */ #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) /** @@ -933,6 +952,10 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no * ordering guarantees for either the CPU or the compiler. */ +/* IAMROOT20 20240120 + * rcu_check_sparse p에 __rcu 속성이 있는지 체크한다. + * p = RCU_INITIALIZER(v) + */ #define RCU_INIT_POINTER(p, v) \ do { \ rcu_check_sparse(p, __rcu); \ diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index f158b025c1750..0804142e59374 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -25,6 +25,9 @@ static __always_inline void *task_stack_page(const struct task_struct *task) static __always_inline unsigned long *end_of_stack(const struct task_struct *task) { + /* IAMROOT20 20240106 + * CONFIG_STACK_GROWSUP : 스택이 상향으로 push되는 경우에 사용 + */ #ifdef CONFIG_STACK_GROWSUP return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; #else diff --git a/include/linux/smp.h b/include/linux/smp.h index 91ea4a67f8ca2..a1bfff834a9b2 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -261,6 +261,10 @@ static inline int get_boot_cpu_id(void) * regular asm read for the stable. */ #ifndef __smp_processor_id +/* IAMROOT20 20240127 + * ex) x(void로 추정됨) + * &cpu_number + 0 + */ #define __smp_processor_id(x) raw_smp_processor_id(x) #endif @@ -268,6 +272,9 @@ static inline int get_boot_cpu_id(void) extern unsigned int debug_smp_processor_id(void); # define smp_processor_id() debug_smp_processor_id() #else +/* IAMROOT20 20240127 + * &cpu_number + 0 + */ # define smp_processor_id() __smp_processor_id() #endif diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h index 91cb36b65a170..7c362e77614c2 100644 --- a/include/linux/spinlock_types_raw.h +++ b/include/linux/spinlock_types_raw.h @@ -11,6 +11,14 @@ #include +/* IAMROOT20 20240113 + * raw_lock : 스핀락. 값이 0 이상인지 체크를 통해, 락이 걸려 있는지 확인 + * magic : 스핀락이 만들어질 때 설정되는 랜덤한 정수 + * owner : 어떤 프로세스에서 실행되는지에 대한 정봅 + * owner_cpu : 몇 번째 CPU에서 실행되는지에 대한 정보 + * dep_map : 구조체가 현재 접근중인 lock을 lock_class에 연결 + * - https://m.blog.naver.com/nawoo/220913522363 + */ typedef struct raw_spinlock { arch_spinlock_t raw_lock; #ifdef CONFIG_DEBUG_SPINLOCK diff --git a/include/linux/string.h b/include/linux/string.h index c062c581a98b9..5a1d28ddc2c99 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -244,6 +244,12 @@ static inline void memzero_explicit(void *s, size_t count) * * @path: path to extract the filename from. */ +/* IAMROOT20 20240525 + * ex1) path = "foo/bar" + * ^---- tail, return : tail + 1 + * ex2) path = "foo" + * ^---- return + */ static inline const char *kbasename(const char *path) { const char *tail = strrchr(path, '/'); diff --git a/include/linux/types.h b/include/linux/types.h index 688fb943556a1..78c7f5388ac9a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -7,6 +7,11 @@ #ifndef __ASSEMBLY__ +/* IAMROOT20 20240127 + * ex) name(bits) bits(256) + * unsigned long bits[BITS_TO_LONGS(256)] + * unsigned long bits[4] + */ #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index a429381e7ca50..eb677f8106911 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -28,9 +28,21 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +/* IAMROOT20 20240518 + * __ALIGN_KERNEL(14, 8) + * -> __ALIGN_KERNEL_MASK(14, 7) + * -> (14+7) & ~7 = 21 & 0xffff_..._fff8 + * -> 16 + */ #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) +/* IAMROOT20 20240127 + * ex) n(256) d(64) + * (((256) + (64) - 1) / (64)) + * ROUND_UP(256/64) + * (4) + */ #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) #endif /* _UAPI_LINUX_CONST_H */ diff --git a/init/init_task.c b/init/init_task.c index ff6c4b9bfe6b1..ebbd27a8ec5ab 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -61,16 +61,47 @@ unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] * Set up the first task table, touch at your own risk!. Base=0, * limit=0x1fffff (=2MB) */ +/* IAMROOT20 20231111 + * ARM64: CONFIG_ARCH_TASK_STRUCT_ON_STACK = false + * IA64: CONFIG_ARCH_TASK_STRUCT_ON_STACK = true + * true의 경우, __init_task_data 매크로는 __section(".data..init_task")을 가리킨다. + * false의 경우, __init_task_data는 아무것도 가리키지 않는다. + */ struct task_struct init_task #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK __init_task_data #endif +/* IAMROOT20 20231111 + * 아래 구조체를 L1_CACHE_BYTES 크기로 정렬한다. + * L1_CACHE_BYTE = 1 << 6 = 64 byte + */ __aligned(L1_CACHE_BYTES) = { +/* IAMROOT20 20231111 + * CONFIG_THREAD_INFO_IN_TASK = true + * INIT_THREAD_INFO : + * 1) 현재 CPU의 FP 처리 활성 상태 + * 2) CPU의 선점 가능 여부 + * 3) shadow call stack 활성 시, shadow call stack에 대한 정보 + * REFCOUNT_INIT : 1 + */ #ifdef CONFIG_THREAD_INFO_IN_TASK .thread_info = INIT_THREAD_INFO(init_task), .stack_refcount = REFCOUNT_INIT(1), #endif +/* IAMROOT20 20231111 + * __state: 프로세스의 상태를 저장 + * - 0x00000000: TASK_RUNNING + * init_stack: THREAD_SHIFT로 정렬된 data 섹션의 위치 + * flags: 프로세스의 세부 실행 상태 + * - PK_KTHREAD: 커널 스레드임을 의미 + * MAX_PRIO: 우선순위 최대값 + * CPU_MASK_ALL: cpumask_t의 모든 배열의 요소마다 모든 비트가 1로 설정된 비트 필드를 설정. + * NR_CPUS: arm64에는 256으로 정의되어 있음 + * tasks: 커널에서 구동 중인 모든 프로세스 중 가장 최상위 프로세스의 태스크 + * active_mm: init_mm 구조체 설정 + * - .pgd = init_pg_dir + */ .__state = 0, .stack = init_stack, .usage = REFCOUNT_INIT(2), @@ -102,6 +133,10 @@ struct task_struct init_task #ifdef CONFIG_CGROUP_SCHED .sched_task_group = &root_task_group, #endif +/* IAMROOT20 20231111 + * INIT_TASK_COMM: "swapper" + * children, sibling: 연결리스트로, 현재 자기 자신을 next와 prev에 설정한다. + */ .ptraced = LIST_HEAD_INIT(init_task.ptraced), .ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry), .real_parent = &init_task, diff --git a/init/main.c b/init/main.c index af50044deed56..fd9863f53ec9e 100644 --- a/init/main.c +++ b/init/main.c @@ -736,6 +736,10 @@ noinline void __ref __noreturn rest_init(void) } /* Check for early params. */ +/* IAMROOT20_20240323 START + * ex) param = "console", val = "ttyS0,115200n8" + * unused = "early options", arg = null + */ static int __init do_early_param(char *param, char *val, const char *unused, void *arg) { @@ -770,6 +774,10 @@ void __init parse_early_param(void) return; /* All fall through to do_early_param. */ + /* IAMROOT20 20240316 + * boot_command_line에는 dt에서 가져온 bootargs의 문자열이 저장되어 있음 + * ex) boot_command_line = "console=ttyS0,115200n8 earlyprintk" + */ strscpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE); parse_early_options(tmp_cmdline); done = 1; @@ -877,6 +885,14 @@ static void __init print_unknown_bootoptions(void) memblock_free(unknown_options, len); } +/* IAMROOT20 20240106 + * asmlinkage : 어셈블리와 링크가 가능하다는 뜻 즉, 어셈블리어로 짜여진 코드에서 + * 이 함수를 호출 할 수 있다는 뜻입니다. + * __visible : 링커가 이 함수를 생략하지 못하도록 하는 것 같습니다. + * __no_sanitize_address : C/C++에서 메모리 버그를 감지하기 위한 감지기(detector)를 + * 사용하지 않겠다. + * __noreturn : 이 함수는 리턴하지 않는다. + */ asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(void) { char *command_line; @@ -884,6 +900,7 @@ asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(v set_task_stack_end_magic(&init_task); smp_setup_processor_id(); + /* IAMROOT20_END 20240106 */ /* IAMROOT20_START 20240113 */ debug_objects_early_init(); init_vmlinux_build_id(); @@ -897,7 +914,17 @@ asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(v * enable them. */ boot_cpu_init(); + /* IAMROOT20_END 20240127 */ + /* IAMROOT20_START 20240203 */ + /* IAMROOT20 20240203 + * 32bit 시스템에서는 1:1 매핑이 일부만 가능하기 때문에 ZONE_NORMAL을 초과하는 메모리가 이 영역을 사용한다. + * 64bit 시스템에서는 모든 물리 메모리가 1:1 매핑이 가능하므로 ZONE_HIGHMEM을 사용하지 않는다. + */ page_address_init(); + /* IAMROOT20 20240203 + * linux_banner 출력 + * ex) Linux version 6.6.10-1-rt19-MANJARO (builduser@fv-az1491-220) (gcc (GCC) 13.2.1 20230801, GNU ld (GNU Binutils) 2.41.0) #1 SMP PREEMPT_RT Wed Jan 10 09:41:23 UTC 2024 + */ pr_notice("%s", linux_banner); early_security_init(); setup_arch(&command_line); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 4d42f0cbc11ea..bf6a67c0afb34 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -129,12 +129,26 @@ static struct workqueue_struct *cgroup_destroy_wq; #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys, struct cgroup_subsys *cgroup_subsys[] = { #include + /* IAMROOT20 20240113 + * SUBSYS(cpuset) --> [cpuset_cgrp_id] = &cpuset_cgrp_subsys, + * SUBSYS(cpu) --> [cpu_cgrp_id] = &cpu_cgrp_subsys, + * SUBSYS(cpuacct) --> [cpuacct_cgrp_id] = &cpuacct_cgrp_subsys, + * SUBSYS(io) --> [io_cgrp_id] = &io_cgrp_subsys, + * SUBSYS(memory) --> [memory_cgrp_id] = &memory_cgrp_subsys, + */ }; #undef SUBSYS /* array of cgroup subsystem names */ #define SUBSYS(_x) [_x ## _cgrp_id] = #_x, static const char *cgroup_subsys_name[] = { +/* IAMROOT20 20240120 + SUBSYS(cpuset) -> [cpuset_cgrp_id] = "cpuset", + SUBSYS(cpu) -> [cpu_cgrp_id] = "cpu", + SUBSYS(cpuacct) -> [cpuacct_cgrp_id] = "cpuacct", + SUBSYS(io) -> [io_cgrp_id] = "io", + SUBSYS(memory) -> [memory_cgrp_id] = "memory", +*/ #include }; #undef SUBSYS @@ -201,6 +215,10 @@ static u64 css_serial_nr_next = 1; * These bitmasks identify subsystems with specific features to avoid * having to do iterative checks repeatedly. */ +/* IAMROOT20 20240120 + * 이러한 비트마스크는 반복적인 검사를 반복적으로 수행할 필요가 없도록 특정 + * 기능을 갖춘 하위 시스템을 식별합니다. + */ static u16 have_fork_callback __read_mostly; static u16 have_exit_callback __read_mostly; static u16 have_release_callback __read_mostly; @@ -5501,6 +5519,10 @@ static int online_css(struct cgroup_subsys_state *css) lockdep_assert_held(&cgroup_mutex); + /* IAMROOT20 20240120 + * ss -> cpu_set_cgrp_subsys 이면 + * css_online = cpuset_css_online + */ if (ss->css_online) ret = ss->css_online(css); if (!ret) { @@ -6031,6 +6053,11 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) /* At system boot, before all subsystems have been * registered, no tasks have been forked, so we don't * need to invoke fork callbacks here. */ + /* IAMROOT20 20240120 + * + * 시스템 부팅 시 모든 하위 시스템이 등록되기 전에 포크된 작업이 없으므 + * 로 여기서 포크 콜백을 호출할 필요가 없습니다. + */ BUG_ON(!list_empty(&init_task.tasks)); BUG_ON(online_css(css)); @@ -6052,10 +6079,16 @@ int __init cgroup_init_early(void) ctx.root = &cgrp_dfl_root; init_cgroup_root(&ctx); + /* IAMROOT20_END 20240113 */ /* IAMROOT20_START 20240120 */ cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; RCU_INIT_POINTER(init_task.cgroups, &init_css_set); + /* IAMROOT20 20240120 + * + * for (i = 0; i < CGROUP_SUBSYS_COUNT && + * ((ss = cgroup_subsys[i]) || true); i++) + */ for_each_subsys(ss, i) { WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id, "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n", diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index e4ca2dd2b7648..c838b7113b84e 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3147,6 +3147,10 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) static int cpuset_css_online(struct cgroup_subsys_state *css) { + /* IAMROOT20 20240120 + * cs = &top_cpuset + * parent = NULL + */ struct cpuset *cs = css_cs(css); struct cpuset *parent = parent_cs(cs); struct cpuset *tmp_cs; diff --git a/kernel/cpu.c b/kernel/cpu.c index f4a2c5845bcbd..e4136064a80bd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2698,11 +2698,24 @@ void set_cpu_online(unsigned int cpu, bool online) } } +/* IAMROOT20 20240120 + boot core의 online, active, present, possible bit를 set + __cpu_possible_mask: 디바이스 트리에서 파싱된 CPU 를 의미함 + __cpu_present_mask: 실제로 CPU 가 물리적으로 존재하는 것을 의미함 + __cpu_online_mask: 부팅 완료를 나타냄 + __cpu_active_mask: 스케줄러가 바라보는 core 상태를 나타냄 + https://yohda.tistory.com/entry/%EC%BB%A4%EB%84%90%ED%8C%8C%EC%9B%8C-Linux-CPU-core-%EC%A0%84%EC%9B%90-%EA%B4%80%EB%A6%AC5-CPU-control-hotplug%EC%9E%91%EC%84%B1%EC%A4%91 + */ +/* IAMROOT20_END 20240120 */ +/* IAMROOT20_START 20240127 */ /* * Activate the first processor. */ void __init boot_cpu_init(void) { + /* IAMROOT20 20240127 + * 현재 실행중인 코어의 논리번호를 가져옴 + */ int cpu = smp_processor_id(); /* Mark the boot cpu "present", "online" etc for SMP and UP case */ diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index c21abc77c53e9..255aabfa00802 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -358,6 +358,18 @@ static const struct reserved_mem_ops rmem_dma_ops = { .device_release = rmem_dma_device_release, }; +/* IAMROOT20 20240413 + * ex) ipa_fws_region: ipa@f6800000 { + * compatible = "shared-dma-pool"; + * reg = <0x0 0xf6800000 0x0 0x5000>; + * no-map; + * }; + * zap_shader_region: gpu@f6900000 { + * compatible = "shared-dma-pool"; + * reg = <0x0 0xf6900000 0x0 0x2000>; + * no-map; + * }; + */ static int __init rmem_dma_setup(struct reserved_mem *rmem) { unsigned long node = rmem->fdt_node; diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 6ea80ae426228..8e4fd0f220338 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -128,6 +128,12 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) #endif #ifdef CONFIG_DMA_PERNUMA_CMA +/* IAMROOT20 20240720 + * kernel parameter 설정 : cma_pernuma=nn[MG] + * - enable 되어 있으면 + * DMA user가 버퍼를 할당할 때, pernuma area에서 메모리를 먼저 찾고, + * 실패한 경우 global default memory 에서 메모리를 할당함 + */ void __init dma_pernuma_cma_reserve(void) { int nid; @@ -141,6 +147,9 @@ void __init dma_pernuma_cma_reserve(void) struct cma **cma = &dma_contiguous_pernuma_area[nid]; snprintf(name, sizeof(name), "pernuma%d", nid); + /* IAMROOT20 20240720 + * pernuma_size_bytes 만큼 memblock에서 메모리 할당(reserved로 표시) + */ ret = cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0, 0, false, name, cma, nid); if (ret) { diff --git a/kernel/fork.c b/kernel/fork.c index 41c964104b584..8184340aeb912 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1090,6 +1090,9 @@ int __weak arch_dup_task_struct(struct task_struct *dst, return 0; } +/* IAMROOT20 20240106 + * 스택의 맨 끝에 magic 코드를 써 넣는다. + */ void set_task_stack_end_magic(struct task_struct *tsk) { unsigned long *stackend; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index d9c822bbffb8d..00115a50070c7 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -67,6 +67,11 @@ static void jump_label_swap(void *a, void *b, int size) struct jump_entry *jeb = b; struct jump_entry tmp = *jea; + /* IAMROOT20 20240316 + * jump_entry_code/target/key() 함수에서 + * '주소 + 값'으로 값을 비교 + * -> '주소'가 변한 만큼(delta) '값'에서 보상을 해줘야 한다 + */ jea->code = jeb->code - delta; jea->target = jeb->target - delta; jea->key = jeb->key - delta; @@ -507,17 +512,33 @@ void __init jump_label_init(void) jump_label_lock(); jump_label_sort_entries(iter_start, iter_stop); + /* IAMROOT20 20240316 + * 모든 jump entry 순회 + */ for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; bool in_init; /* rewrite NOPs */ + /* IAMROOT20 20240316 + * nop 엔트리일 경우 nop 명령어로 교환 + * arm64의 경우 컴파일 타임에 nop으로 생성되어 있으므로 생략됨 + */ if (jump_label_type(iter) == JUMP_LABEL_NOP) arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); + /* IAMROOT20 20240316 + * entry의 code가 init 섹션에 있는지 확인 + * init 섹션에 있을 경우 나중에 init 섹션 내용은 삭제되므로 + * init 섹션안에 포함된 jump entry들은 업데이트 할 필요가 없음 + */ in_init = init_section_contains((void *)jump_entry_code(iter), 1); jump_entry_set_init(iter, in_init); + /* IAMROOT20 20240316 + * key의 type의 하위 2비트에 nop 엔트리인지 jump 엔트리인지 기록하고 + * key의 entries와 sorting되어 있는 첫번째 jump 엔트리만 연결 + */ iterk = jump_entry_key(iter); if (iterk == key) continue; diff --git a/kernel/params.c b/kernel/params.c index 6a7548979aa9a..b640cc3e611e4 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -151,6 +151,10 @@ static int parse_one(char *param, if (handle_unknown) { pr_debug("doing %s: %s='%s'\n", doing, param, val); + /* IAMROOT20 20240316 + * ex) do_early_param("console", "ttyS0,115200n8", "early options", NULL); + */ + /* IAMROOT20_END 20240316 */ return handle_unknown(param, val, doing, arg); } @@ -172,6 +176,9 @@ char *parse_args(const char *doing, char *param, *val, *err = NULL; /* Chew leading spaces */ + /* IAMROOT20 20240316 + * args 앞에 존재하는 space를 제거 + */ args = skip_spaces(args); if (*args) @@ -181,6 +188,10 @@ char *parse_args(const char *doing, int ret; int irq_was_disabled; + /* IAMROOT20 20240316 + * ex) args = "console=ttyS0,115200n8 earlyprintk" + * param = console, val = ttyS0,115200n8 + */ args = next_arg(args, ¶m, &val); /* Stop at -- */ if (!val && strcmp(param, "--") == 0) diff --git a/lib/buildid.c b/lib/buildid.c index e3a7acdeef0ed..5be99b6c8beee 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -20,6 +20,25 @@ static int parse_build_id_buf(unsigned char *build_id, { Elf32_Word note_offs = 0, new_offs; + /* IAMROOT20 20240113 + * sizeof(Elf32_Nhdr) = 12 + * + * exam) + * +-------------------------+ + * |<--------Elf32_Nhdr----->| + * |namesz |descsz | type | + * +-------------------------+ + * 04000000 14000000 03000000 474e5500 ............GNU. + * |<---------------------- build id - --------------- + * b752b23a ddbcb03d 42316e26 804bbcba .R.:...=B1n&.K.. + * |------>| + * 415c0cb6 06000000 04000000 01010000 A\.............. + * 4c696e75 78000000 00000000 06000000 Linux........... + * 01000000 00010000 4c696e75 78000000 ........Linux... + * 00000000 + * + * build_id = b752b23a ddbcb03d 42316e26 804bbcba 415c0cb6 + */ while (note_offs + sizeof(Elf32_Nhdr) < note_size) { Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); @@ -28,10 +47,16 @@ static int parse_build_id_buf(unsigned char *build_id, !strcmp((char *)(nhdr + 1), "GNU") && nhdr->n_descsz > 0 && nhdr->n_descsz <= BUILD_ID_SIZE_MAX) { + /* IAMROOT20 20240113 + * Elf32_Nhdr과 name 다음에 있는 build id를 build_id에 복사한다. + */ memcpy(build_id, note_start + note_offs + ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), nhdr->n_descsz); + /* IAMROOT20 20240113 + * build_id 남은 부분을 0으로 셋팅한다. + */ memset(build_id + nhdr->n_descsz, 0, BUILD_ID_SIZE_MAX - nhdr->n_descsz); if (size) @@ -182,6 +207,12 @@ unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX] __ro_after_init; */ void __init init_vmlinux_build_id(void) { + /* IAMROOT20 20240113 + * __weak : https://kldp.org/node/40383 + * 같은 이름의 심볼이 있으면 weak symbol이 strong symbol에게 overriding이 되게 하는 것입니다. + * 예를 들어 shared library에 a라는 weak symbol이 있을 때 이를 사용하는 프로그램에서 a라는 + * 심볼이 있으면 프로그램에서는 shared library가 아닌 자신에 있는 것을 사용하게 됩니다. + */ extern const void __start_notes __weak; extern const void __stop_notes __weak; unsigned int size = &__stop_notes - &__start_notes; diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 984985c39c9b0..8ef22e6b6f759 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -56,6 +56,9 @@ static DEFINE_PER_CPU(struct debug_percpu_free, percpu_obj_pool); static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; +/* IAMROOT20 20240113 + * __initdata : 커널 초기화 이후 해제되는 영역에 코드를 배치함으로써 메모리 영역 확보 +*/ static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; static DEFINE_RAW_SPINLOCK(pool_lock); @@ -1323,6 +1326,10 @@ static inline void debug_objects_selftest(void) { } * the static object pool objects into the poll list. After this call * the object tracker is fully operational. */ +/* IAMROOT20 20240113 + * ODEBUG_HASH_SIZE : 16K + * ODEBUG_POOL_SIZE : 1024 +*/ void __init debug_objects_early_init(void) { int i; diff --git a/lib/string.c b/lib/string.c index 3d55ef8901068..277d07a0e214a 100644 --- a/lib/string.c +++ b/lib/string.c @@ -324,6 +324,12 @@ EXPORT_SYMBOL(strncmp); * Note that the %NUL-terminator is considered part of the string, and can * be searched for. */ +/* IAMROOT20 20240525 + * ex1) s = "foo/bar", c = '/' + * ^---- s : return + * ex2) s = "foo/bar", c = ':' + * - return NULL + */ char *strchr(const char *s, int c) { for (; *s != (char)c; ++s) @@ -343,6 +349,12 @@ EXPORT_SYMBOL(strchr); * Returns pointer to first occurrence of 'c' in s. If c is not found, then * return a pointer to the null byte at the end of s. */ +/* IAMROOT20 20240525 + * ex) s = "foo/bar", c = '/' + * ^----s : return + * ex) s = "foo/bar", c = ':' + * ^----s : return + */ char *strchrnul(const char *s, int c) { while (*s && *s != (char)c) @@ -459,6 +471,11 @@ EXPORT_SYMBOL(strspn); * @s: The string to be searched * @reject: The string to avoid */ +/* IAMROOT20 20240525 + * ex) s = "foo/bar", reject = "/:" + * ^----p + * return : p-s = 3 + */ size_t strcspn(const char *s, const char *reject) { const char *p; diff --git a/mm/cma.c b/mm/cma.c index 6268d6620254f..e7ff8ef4cead5 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -171,6 +171,10 @@ void __init cma_reserve_pages_on_error(struct cma *cma) * * This function creates custom contiguous area from already reserved memory. */ +/* IAMROOT20 20240607 + * - size, align, reserved 영역 범위 검사등을 수행하고 cma 정보를 하나 얻고 + * 초기화한다. + */ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, @@ -235,6 +239,15 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * * If @fixed is true, reserve contiguous area at exactly @base. If false, * reserve in range from @base to @limit. + */ +/* IAMROOT20 20240607 + * - cmdline or kernel config에 의한 cma 정보를 memblock reserved 영역을 할당하고 + * @res_cma(struct cma)에 등록한다. + * + * res = cma_declare_contiguous_nid(0, size, 0, + * PAGE_SIZE << HUGETLB_PAGE_ORDER, // SIZE_2M + * 0, false, name, // name="hugetlb[0-16]" + * &hugetlb_cma[nid], nid); */ int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, @@ -312,7 +325,15 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, goto err; } + /* IAMROOT20_END 20240713 */ /* IAMROOT20_START 20240720 */ /* Reserve memory */ + /* IAMROOT20 20240720 + * 1) fixed == true + * - base가 지정되어 있는 경우 + * 2) fixed == false + * - base가 지정되어 있지 않은 경우, + * memblock에서 할당가능한 영역(size)을 찾아서 reserved로 설정한다 + */ if (fixed) { if (memblock_is_region_reserved(base, size) || memblock_reserve(base, size) < 0) { @@ -330,6 +351,11 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, * Avoid using first 4GB to not interfere with constrained zones * like DMA/DMA32. */ + /* IAMROOT20 20240720 + * 메모리가 충분한 경우, cma를 bottom-up으로 할당 + * - 메모리 compaction하는 경우, cma 할당 fail을 막을 수 있음: + * - DMA32 영역의 간섭을 피하기 위해 start는 4G 이후로 설정 + */ #ifdef CONFIG_PHYS_ADDR_T_64BIT if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) { memblock_set_bottom_up(true); @@ -345,12 +371,20 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, * try allocating from high memory first and fall back to low * memory in case of failure. */ + /* IAMROOT20 20240720 + * arm64의 경우 highmem을 사용하지 않기 때문에 아래 if문을 수행하지 않음 + * - highmem_start = memblock_end + */ if (!addr && base < highmem_start && limit > highmem_start) { addr = memblock_alloc_range_nid(size, alignment, highmem_start, limit, nid, true); limit = highmem_start; } + /* IAMROOT20 20240720 + * 위의 두 if문에서 할당하지 못한 경우, 실제로 여기서 memblock 할당 + * - base ~ limit에서 size 만큼의 메모리 영역을 할당 + */ if (!addr) { addr = memblock_alloc_range_nid(size, alignment, base, limit, nid, true); diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index 9bc12e526ed0b..2d702a3caa07d 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -108,7 +108,10 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) int i, slot; WARN_ON(system_state >= SYSTEM_RUNNING); - + + /* IAMROOT_20240224_START + * slot - 사용하지 않은 slot을 찾는다 + */ slot = -1; for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { if (!prev_map[i]) { @@ -130,6 +133,11 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) /* * Mappings have to be page-aligned */ + /* IAMROOT_20240224 + * mapping 할 영역을 page-aligned을 맞춘다. + * - phys_addr : 시작주소 -> page aligned-down + * - size : page aligned된 크기 (>= 1 page size) + */ offset = offset_in_page(phys_addr); phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr + 1) - phys_addr; @@ -146,6 +154,11 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) */ idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; while (nrpages > 0) { + /* IAMROOT_20240224 + * 정규 페이징 이후에는 early_ioremap() api를 사용하지 않음 + * - x86, arm64는 정규 페이징 이후에도 early_ioremap() api를 + * 사용할 수 있음 + */ if (after_paging_init) __late_set_fixmap(idx, phys_addr, prot); else diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f791076da157c..228173124debf 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7489,6 +7489,18 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) #ifdef CONFIG_CMA static bool cma_reserve_called __initdata; +/* IAMROOT20 20240607 + * cmdline에 hugetlb_cma=0:1G,1:2G,2:1G,3:1G 일때 + * + * hugetlb_cma_size_in_node[0] = 1*SIZE_1G + * hugetlb_cma_size_in_node[1] = 2*SIZE_1G + * hugetlb_cma_size_in_node[2] = 3*SIZE_1G + * hugetlb_cma_size_in_node[3] = 1*SIZE_1G + * + * cmdline에 hugetlb_cma=7G 일때 + * + * hugetlb_cma_size = 7*SIZE_1G + */ static int __init cmdline_parse_hugetlb_cma(char *p) { int nid, count = 0; @@ -7496,6 +7508,10 @@ static int __init cmdline_parse_hugetlb_cma(char *p) char *s = p; while (*s) { + /* IAMROOT20 20240607 + * %n은 읽은 문자 개수를 리턴하는 기능이다. + * https://woogyun.tistory.com/301 + */ if (sscanf(s, "%lu%n", &tmp, &count) != 1) break; @@ -7550,6 +7566,9 @@ void __init hugetlb_cma_reserve(int order) continue; } + /* IAMROOT20 20240607 + * order가 18이면 CMA 최소사이즈는 SIZE_1G + */ if (hugetlb_cma_size_in_node[nid] < (PAGE_SIZE << order)) { pr_warn("hugetlb_cma: cma area of node %d should be at least %lu MiB\n", nid, (PAGE_SIZE << order) / SZ_1M); @@ -7571,6 +7590,10 @@ void __init hugetlb_cma_reserve(int order) return; } + /* IAMROOT20 20240607 + * node별 CMA 할당을 할 필요가 없을때. + */ + /* IAMROOT20_END 20240706 */ /* IAMROOT20_START 20240713 */ if (!node_specific_cma_alloc) { /* * If 3 GB area is requested on a machine with 4 numa nodes, @@ -7603,6 +7626,10 @@ void __init hugetlb_cma_reserve(int order) * may be returned to CMA allocator in the case of * huge page demotion. */ + /* IAMROOT20 20240720 + * - size 만큼 memblock에서 continguous memory 할당 + * - hugetlb_cma[nid]에 할당한 메모리 정보(struct cma)에 대한 포인터 저장 + */ res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << HUGETLB_PAGE_ORDER, 0, false, name, diff --git a/mm/init-mm.c b/mm/init-mm.c index efa97b57acfd8..93a5d1c93cb03 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -27,6 +27,9 @@ * Since there is only one init_mm in the entire system, keep it simple * and size this cpu_bitmask to NR_CPUS. */ +/* IAMROOT20 20231125 + * arm64에서는 .pgd에 swapper_pg_dir대신 init_pg_dir로 설정한다. + */ struct mm_struct init_mm = { .mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, init_mm.mmap_lock), .pgd = swapper_pg_dir, diff --git a/mm/kmemleak.c b/mm/kmemleak.c index a2d34226e3c8c..ed4858230be44 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -408,7 +408,11 @@ static struct kmemleak_object *__lookup_object(unsigned long ptr, int alias, while (rb) { struct kmemleak_object *object; unsigned long untagged_objp; - + + /* IAMROOT20 20240622 + * rb의 주소에 kmemleak_object구조체에서 rb_node위치의 + * offest을 빼서 object의 주소를 구함. + */ object = rb_entry(rb, struct kmemleak_object, rb_node); untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); @@ -794,6 +798,10 @@ static void delete_object_part(unsigned long ptr, size_t size, bool is_phys) * split. Note that partial freeing is only done by free_bootmem() and * this happens before kmemleak_init() is called. */ + /* IAMROOT20 20240622 + * object보다 제거할 메모리 영역이 작을 경우, + * 앞과 뒤의 object를 각각 생성하고 메모리 영역 삭제 + */ start = object->pointer; end = object->pointer + object->size; if (ptr > start) diff --git a/mm/memblock.c b/mm/memblock.c index 3feafea06ab21..c4ace8c8410ea 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -26,7 +26,7 @@ #define INIT_PHYSMEM_REGIONS 4 #ifndef INIT_MEMBLOCK_RESERVED_REGIONS -# define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS +#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS #endif #ifndef INIT_MEMBLOCK_MEMORY_REGIONS @@ -106,6 +106,10 @@ unsigned long min_low_pfn; unsigned long max_pfn; unsigned long long max_possible_pfn; +/* IAMROOT20 20240224 + * INIT_MEMBLOCK_MEMORY_REGIONS = 128 * 8 + * INIT_MEMBLOCK_RESERVED_REGIONS = 128 + 256 + 1 + */ static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock; static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP @@ -143,7 +147,6 @@ struct memblock_type physmem = { * pointer will be reset to NULL at memblock_discard() */ static __refdata struct memblock_type *memblock_memory = &memblock.memory; - #define for_each_memblock_type(i, memblock_type, rgn) \ for (i = 0, rgn = &memblock_type->regions[0]; \ i < memblock_type->cnt; \ @@ -167,6 +170,13 @@ static enum memblock_flags __init_memblock choose_memblock_flags(void) } /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ +/* IAMROOT20 20240309 + * ex) PHYS_ADDR_MAX = 0xffff_ffff_ffff_ffff + base = 0xffff_ffff_ffff_fff0 + size = 0x100 + return size = min(0x100, 0xf) = 0xf; + (base + size)가 overflow가 발생하면 (PHYS_ADDR_MAX - base)를 size로 재설정 + */ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) { return *size = min(*size, PHYS_ADDR_MAX - base); @@ -217,12 +227,24 @@ __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, { phys_addr_t this_start, this_end, cand; u64 i; - + + /* IAMROOT20 20240309 + * 빈 memblock 공간을 루프를 돌며 하나씩 알아옴 + */ for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { + /* IAMROOT20 20240309 + * start ~ end 범위를 벗어나면 조정 + */ this_start = clamp(this_start, start, end); this_end = clamp(this_end, start, end); + /* IAMROOT20 20240309 + * 요청 사이즈의 비교를 align된 크기로 하기 위해 올림 + */ cand = round_up(this_start, align); + /* IAMROOT20 20240309 + * 알아온 free 영역의 범위에 size가 포함될 수 있으면 cand 주소를 리턴 + */ if (cand < this_end && this_end - cand >= size) return cand; } @@ -258,10 +280,16 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, this_start = clamp(this_start, start, end); this_end = clamp(this_end, start, end); + /* IAMROOT20 20240406 + * 만약 할당된 크기가 size보다 작으면 continue + */ if (this_end < size) continue; cand = round_down(this_end - size, align); + /* IAMROOT20 20240406 + * 영역을 할당받았지만, round_down된 주소가 this_start보다 작으면 continue + */ if (cand >= this_start) return cand; } @@ -330,6 +358,9 @@ static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, ret = memblock_find_in_range_node(size, align, start, end, NUMA_NO_NODE, flags); + /* IAMROOT20 20240309 + * 미러 플래그가 요청된 상태에서 공간을 찾지 못한 경우 미러 플래그를 제거하고 다시 공간을 찾음 + */ if (!ret && (flags & MEMBLOCK_MIRROR)) { pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", &size); @@ -446,9 +477,15 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, if (type != &memblock.reserved) new_area_start = new_area_size = 0; + /* IAMROOT20 20240309 + * 새로 관리 영역을 할당받을 공간은 추가 요청 영역을 피해야 하므로 요청 영역의 상부를 먼저 검색 + */ addr = memblock_find_in_range(new_area_start + new_area_size, memblock.current_limit, new_alloc_size, PAGE_SIZE); + /* IAMROOT20 20240309 + * 첫 번째 검색에서 할당받지 못 하고 요청 타입이 reserved인 경우에 추가 요청 영역을 피해 하부를 검색 + */ if (!addr && new_area_size) addr = memblock_find_in_range(0, min(new_area_start, memblock.current_limit), @@ -504,6 +541,9 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, * @end_rgn: end scanning at (@end_rgn - 1) * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn) */ +/* IAMROOT20 20240309 + * (start_rgn - 1) index부터 end_rgn index미만까지 merge + */ static void __init_memblock memblock_merge_regions(struct memblock_type *type, unsigned long start_rgn, unsigned long end_rgn) @@ -520,13 +560,22 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type, memblock_get_region_node(this) != memblock_get_region_node(next) || this->flags != next->flags) { + /* IAMROOT20 20240309 + * this region과 next region이 인접하지 않거나 같은 node가 아니면 merge할 수 없으므로 다음 region을 확인하러 넘어감 + */ BUG_ON(this->base + this->size > next->base); i++; continue; } + /* IAMROOT20 20240309 + * this region과 next region을 merge + */ this->size += next->size; /* move forward from next + 1, index of which is i + 2 */ + /* IAMROOT20 20240309 + * (next + 1)부터 (type->cnt - (i + 2))개를 앞으로 한칸씩 이동시킴 + */ memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); type->cnt--; end_rgn--; @@ -545,6 +594,9 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type, * Insert new memblock region [@base, @base + @size) into @type at @idx. * @type must already have extra room to accommodate the new region. */ +/* IAMROOT20 20240309 + * (base ~ size) 영역을 memblock->type->region[idx] 위치에 insert + */ static void __init_memblock memblock_insert_region(struct memblock_type *type, int idx, phys_addr_t base, phys_addr_t size, @@ -554,7 +606,13 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type, struct memblock_region *rgn = &type->regions[idx]; BUG_ON(type->cnt >= type->max); + /* IAMROOT20 20240309 + * memblock->type->region[idx]부터 뒤로 한 칸씩 이동시킴 + */ memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); + /* IAMROOT20 20240309 + * memblock->type->region[idx] 위치에 new region을 insert + */ rgn->base = base; rgn->size = size; rgn->flags = flags; @@ -579,6 +637,10 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type, * Return: * 0 on success, -errno on failure. */ +/* IAMROOT20_START 20240309 */ +/* IAMROOT20 20240309 + * 참고: http://jake.dothome.co.kr/memblock-1/ + */ static int __init_memblock memblock_add_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size, int nid, enum memblock_flags flags) @@ -593,6 +655,10 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, return 0; /* special case for empty array */ + /* IAMROOT20 20240309 + * type에 등록된 regions이 없을 경우 + * new region을 등록 후 함수 종료 + */ if (type->regions[0].size == 0) { WARN_ON(type->cnt != 1 || type->total_size); type->regions[0].base = base; @@ -610,9 +676,17 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, * that there is enough empty regions in @type, and we can insert * regions directly. */ + /* IAMROOT20 20240309 + * 현재 최대로 넣을 수 있는 region 갯수는 type->max인데 + * 최악의 경우인 (type->cnt * 2 + 1)보다 type->max가 작거나 같으면 insert를 true로 설정하여 repeat을 방지함 + */ if (type->cnt * 2 + 1 <= type->max) insert = true; +/* IAMROOT20 20240309 + * insert가 false이면 먼저 정확한 new_region이 필요한 영역을 확인하고 insert를 true로 변경 + * insert가 true이면 repeat을 다시 실행하지 않고 실제 new region을 insert + */ repeat: /* * The following is executed twice. Once with %false @insert and @@ -622,18 +696,30 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, base = obase; nr_new = 0; + /* IAMROOT20 20240309 + * 기존에 존재하는 region을 순회 + */ for_each_memblock_type(idx, type, rgn) { phys_addr_t rbase = rgn->base; phys_addr_t rend = rbase + rgn->size; + /* IAMROOT20 20240309 + * (rbase >= end)일 경우 더 이상 new region이 기존 region과 겹치지 않으므로 반복문 종료 + */ if (rbase >= end) break; + /* IAMROOT20 20240309 + * (rend <= base)일 경우 new region의 현재 비교하고 있는 region과 겹치지 않으므로 다음 region으로 진행 + */ if (rend <= base) continue; /* * @rgn overlaps. If it separates the lower part of new * area, insert that portion. */ + /* IAMROOT20 20240309 + * (rbase > base)일 경우 new region의 하위 부분과 현재 비교하고 있는 region이 겹치므로 new region을 하위 부분인 (base ~ rbase) 영역를 insert + */ if (rbase > base) { #ifdef CONFIG_NUMA WARN_ON(nid != memblock_get_region_node(rgn)); @@ -650,10 +736,16 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, } } /* area below @rend is dealt with, forget about it */ + /* IAMROOT20 20240309 + * base를 새롭게 설정 + */ base = min(rend, end); } /* insert the remaining portion */ + /* IAMROOT20 20240309 + * 나머지 (base ~ end) 영역을 insert + */ if (base < end) { nr_new++; if (insert) { @@ -665,6 +757,9 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, } } + /* IAMROOT20 20240309 + * insert 해야하는 new region이 없으면 함수 종료 + */ if (!nr_new) return 0; @@ -673,12 +768,18 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, * insertions; otherwise, merge and return. */ if (!insert) { + /* IAMROOT20 20240309 + * insert 해야될 new region이 max region size보다 크면 memblock_double_array를 호출하여 memblock.type의 사이즈를 2배 증가시킴 + */ while (type->cnt + nr_new > type->max) if (memblock_double_array(type, obase, size) < 0) return -ENOMEM; insert = true; goto repeat; } else { + /* IAMROOT20 20240309 + * 모든 insert 작업이 끝나고 인접한 region들끼리 merge를 수행 + */ memblock_merge_regions(type, start_rgn, end_rgn); return 0; } @@ -719,6 +820,9 @@ int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, * Return: * 0 on success, -errno on failure. */ +/* IAMROOT20_20240302 + * memblock.memory에 base부터 end까지 region을 등록 + */ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size - 1; @@ -745,6 +849,9 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) * Return: * 0 on success, -errno on failure. */ +/* IAMROOT20 20240309 + * 제거할 영역만 분리하기 region을 나눔 + */ static int __init_memblock memblock_isolate_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size, int *start_rgn, int *end_rgn) @@ -759,6 +866,10 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, return 0; /* we'll create at most two more regions */ + /* IAMROOT20 20240309 + * 기존 region수에 위아래 최대로 추가될 수 있는 2개의 region수를 더한 값이 max region size보다 크면 + * memblock_double_array를 호출하여 memblock.type의 사이즈를 2배씩 증가시킴 + */ while (type->cnt + 2 > type->max) if (memblock_double_array(type, base, size) < 0) return -ENOMEM; @@ -767,11 +878,21 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, phys_addr_t rbase = rgn->base; phys_addr_t rend = rbase + rgn->size; + /* IAMROOT20 20240309 + * (rbase >= end)일 경우 분리할 영역이 기존 region과 겹치지 않으므로 반복문 종료 + */ if (rbase >= end) break; + /* IAMROOT20 20240309 + * (rend <= base)일 경우 분리할 영역이 현재 비교하고 있는 region과 겹치지 않으므로 다음 region으로 진행 + */ if (rend <= base) continue; + /* IAMROOT20 20240309 + * (rbase < base)일 경우 분리할 영역의 하위 부분과 현재 비교하고 있는 region의 상위 부분이 겹치므로 + * 비교하는 region의 겹치지 않는 하위 부분인 (rbase ~ base)영역을 분리해 new region으로 insert + */ if (rbase < base) { /* * @rgn intersects from below. Split and continue @@ -783,6 +904,11 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, memblock_insert_region(type, idx, rbase, base - rbase, memblock_get_region_node(rgn), rgn->flags); + /* IAMROOT20 20240309 + * (rend > end)일 경우 분리할 영역의 상위 부분과 현재 비교하고 있는 region의 하위 부분이 겹치므로 + * 비교하는 region의 겹치는 하위 부분인 (rbase ~ end)영역을 분리해 new region으로 insert + * region추가 후 idx를 하나 줄여 다음 루프에서 제거할 region을 가리키도록 함 + */ } else if (rend > end) { /* * @rgn intersects from above. Split and redo the @@ -815,6 +941,9 @@ static int __init_memblock memblock_remove_range(struct memblock_type *type, if (ret) return ret; + /* IAMROOT20 20240309 + * 분리된 region을 제거 + */ for (i = end_rgn - 1; i >= start_rgn; i--) memblock_remove_region(type, i); return 0; @@ -829,6 +958,7 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) return memblock_remove_range(&memblock.memory, base, size); } +/* IAMROOT20_END 20240309 */ /** * memblock_free - free boot memory allocation @@ -863,6 +993,10 @@ int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) return memblock_remove_range(&memblock.reserved, base, size); } +/* IAMROOT20_20240302 + * memblock.reserved에 base부터 end까지 region을 등록 + */ +/* IAMROOT20_END 20240302 */ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size - 1; @@ -1002,6 +1136,9 @@ static bool should_skip_region(struct memblock_type *type, if (type != memblock_memory) return false; + /* IAMROOT20 20240406 + * NUMA_NO_NODE이거나, 현재 region의 노드의 nid가, 찾고자 하는 노드의 nid와 다른 경우 skip + */ /* only memory regions are associated with nodes, check it */ if (nid != NUMA_NO_NODE && nid != m_nid) return true; @@ -1057,6 +1194,9 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid) { + /* IAMROOT20 20240309 + * idx 값을 절반으로 나누어 lsb: idx_a의 카운터,, msb: idx_b의 카운터 + */ int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; @@ -1074,6 +1214,10 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, if (should_skip_region(type_a, m, nid, flags)) continue; + /* IAMROOT20 20240309 + * type_b에 대한 영역이 지정되지 않으면(null) 현재 1차 루프 + * 인덱스의 memblock에 대한 영역을 반환 + */ if (!type_b) { if (out_start) *out_start = m_start; @@ -1093,6 +1237,10 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, phys_addr_t r_end; r = &type_b->regions[idx_b]; + /* IAMROOT20 20240309 + * idx_b가 0보다 크면 현재 이전 memblock의 끝 주소를 + * 가리키고 idx_b가 0이면 0번 주소를 지정 + */ r_start = idx_b ? r[-1].base + r[-1].size : 0; r_end = idx_b < type_b->cnt ? r->base : PHYS_ADDR_MAX; @@ -1101,9 +1249,18 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, * if idx_b advanced past idx_a, * break out to advance idx_a */ + /* IAMROOT20 20240309 + * reserve memblock 영역이 memory memblock 영역을 벗어난 + * 경우 2차 루프를 빠져나가서 다음 memory memblock을 준비 + */ if (r_start >= m_end) break; /* if the two regions intersect, we're done */ + /* IAMROOT20 20240309 + * 두 영역이 교차하는 경우 + * out_start에 하단 reserve 영역값의 끝 주소나 memory 영역값의 시작 주소중 가장 큰 주소 + * out_end에 상단 reserve 영역값의 시작 주소나 memory 영역값의 끝 주소중에 가장 작은 주소 + */ if (m_start < r_end) { if (out_start) *out_start = @@ -1116,6 +1273,11 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, * The region which ends first is * advanced for the next iteration. */ + /* IAMROOT20 20240309 + * reserve 영역의 끝 주소가 memory 영역의 끝주소와 + * 비교하여 큰 경우 idx_a를 증가, 다음 memory 영역을 준비 + * 크지 않은 경우 idx_b를 증가, 다음 reserve 영역을 준비 + */ if (m_end <= r_end) idx_a++; else @@ -1160,6 +1322,9 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) nid = NUMA_NO_NODE; + /* IAMROOT20 20240406 + * 만약 idx가 가장 끝 주소인 경우, idx_a를 마지막 리전이 되고, idx_b를 마지막 리전의 + 1이 된다 + */ if (*idx == (u64)ULLONG_MAX) { idx_a = type_a->cnt - 1; if (type_b != NULL) @@ -1178,6 +1343,9 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, if (should_skip_region(type_a, m, nid, flags)) continue; + /* IAMROOT20 20240406 + * 만약 type_b가 지정되지 않으면, 첫 번째 loop의 memblock 영역을 반환 + */ if (!type_b) { if (out_start) *out_start = m_start; @@ -1196,6 +1364,11 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, phys_addr_t r_start; phys_addr_t r_end; + /* IAMROOT20 20240406 + * idx_b의 리전을 바탕으로, 해당 리전의 이전 리전의 끝 주소를 r_start로 설정 + * 현재 리전의 시작 주소를 r_end로 설정 + * (만약 idx_b가 마지막 리전 이후를 가리키면 PHYS_ADDR_MAX로 설정) + */ r = &type_b->regions[idx_b]; r_start = idx_b ? r[-1].base + r[-1].size : 0; r_end = idx_b < type_b->cnt ? @@ -1204,7 +1377,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, * if idx_b advanced past idx_a, * break out to advance idx_a */ - if (r_end <= m_start) break; /* if the two regions intersect, we're done */ @@ -1219,6 +1391,13 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, idx_a--; else idx_b--; + /* IAMROOT20 20240406 + * idx_a = 0, idx_b = 1 + * idx = 0000_0001_0000_0000 + * + * idx_a = -1, idx_b = 0 + * idx = 0000_0000_ffff_ffff + */ *idx = (u32)idx_a | (u64)idx_b << 32; return; } @@ -1280,7 +1459,10 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, #ifdef CONFIG_NUMA int start_rgn, end_rgn; int i, ret; - + /* IAMROOT20 20240706 + * base ~ base + size 영역에 해당하는 memblock type의 + * 시작 region 인덱스와 끝 region 인덱스를 가져온다. + */ ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); if (ret) return ret; @@ -1404,7 +1586,11 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, flags); if (found && !memblock_reserve(found, size)) goto done; - + + /* IAMROOT20 20240413 + * nid가 지정되어있고, exact_nid가 false 인 경우 + * nid를 NUMA_NO_NODE로 변경하여 모든 nid에 대해 memory를 찾는다 + */ if (nid != NUMA_NO_NODE && !exact_nid) { found = memblock_find_in_range_node(size, align, start, end, NUMA_NO_NODE, @@ -1413,6 +1599,10 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, goto done; } + /* IAMROOT20 20240413 + * MEMBLOCK_MIRROR region에서 먼저 memory 할당을 시도했지만 실패하는 경우 + * mirror가 아닌 region에서 다시 할당을 시도한다 + */ if (flags & MEMBLOCK_MIRROR) { flags &= ~MEMBLOCK_MIRROR; pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", diff --git a/mm/memory.c b/mm/memory.c index 5ce82a76201d5..6143da0ec8934 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -126,6 +126,10 @@ static bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf) * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL * and ZONE_HIGHMEM. */ +/* IAMROOT20 20240607 + * high_memory = __va(memblock_end_of_DRAM() - 1) + 1; + * arm64_memblock_init에서 설정 + */ void *high_memory; EXPORT_SYMBOL(high_memory); diff --git a/mm/mm_init.c b/mm/mm_init.c index 7f7f9c6778546..ef82a19f49056 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -215,6 +215,10 @@ static int __init mm_sysfs_init(void) } postcore_initcall(mm_sysfs_init); +/* IAMROOT20 20240810 + * free_area_init()에서 초기화함 + * - [ZONE_MOVABLE]은 설정하지 않음 + */ static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata; static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata; static unsigned long zone_movable_pfn[MAX_NUMNODES] __initdata; @@ -335,6 +339,10 @@ static void __init find_usable_zone_for_movable(void) * memory. When they don't, some nodes will have more kernelcore than * others */ +/* IAMROOT20 20240824 + * zone_movable_pfn[]에 movable zone의 start pfn 설정 + * - 커널 파라미터 movable_node, kernelcore, movablecore 참조 + */ static void __init find_zone_movable_pfns_for_nodes(void) { int i, nid; @@ -446,6 +454,11 @@ static void __init find_zone_movable_pfns_for_nodes(void) restart: /* Spread kernelcore memory as evenly as possible throughout nodes */ kernelcore_node = required_kernelcore / usable_nodes; + + /* IAMROOT20_20240810 + * 노드 안의 range를 돌면서, required_kernelcore을 갱신하고 + * zone_movable_pfn의 시작점을 설정한다. + */ for_each_node_state(nid, N_MEMORY) { unsigned long start_pfn, end_pfn; @@ -472,17 +485,35 @@ static void __init find_zone_movable_pfns_for_nodes(void) if (start_pfn >= end_pfn) continue; + /* IAMROOT20_20240810 + * ZONE_NORMAL의 시작 pfn보다 아래에 있는 range의 경우, + * 해당 영역을 제외하여 kernelcore_remaining과 + * required_kernelcore을 다시 계산한다. + * 그리고 start_pfn을 ZONE_NORMAL의 시작점으로 갱신한다. + */ /* Account for what is only usable for kernelcore */ if (start_pfn < usable_startpfn) { unsigned long kernel_pages; kernel_pages = min(end_pfn, usable_startpfn) - start_pfn; + /* IAMROOT20_20240810 + * required_kernelcore : 전체 노드에 필요한 kernelcore의 양 + * kernelcore_remaining : 현재 노드에 배정된 kernelcore의 양 + */ kernelcore_remaining -= min(kernel_pages, kernelcore_remaining); required_kernelcore -= min(kernel_pages, required_kernelcore); + /* IAMROOT20_20240810 + * 만약 현재 range가 ZONE_NORMAL 아래에 완전히 포함되는 경우, + * - zone_movable_pfn을 갱신하고 다음 range로 continue + * + * 만약 현재 range가 ZONE_NORMAL과 겹치는 경우, + * - 아래 조건문을 타지 않고, start_pfn을 + * ZONE_NORMAL의 시작점으로 갱신한다 + */ /* Continue if range is now fully accounted */ if (end_pfn <= usable_startpfn) { @@ -498,6 +529,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) start_pfn = usable_startpfn; } + /* * The usable PFN range for ZONE_MOVABLE is from * start_pfn->end_pfn. Calculate size_pages as the @@ -521,6 +553,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) } } + /* IAMROOT20_END 20240810 *//* IAMROOT20_START 20240824 */ /* * If there is still required_kernelcore, we do another pass with one * less node in the count. This will push zone_movable_pfn[nid] further @@ -593,6 +626,10 @@ static int __meminit __early_pfn_to_nid(unsigned long pfn, if (state->last_start <= pfn && pfn < state->last_end) return state->last_nid; + /* IAMROOT20 20240727 + * pfn에 해당하는 memblock.memory 영역을 찾아 nid를 return + * - start_pfn, end_pfn을 업데이트 + */ nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); if (nid != NUMA_NO_NODE) { state->last_start = start_pfn; @@ -658,6 +695,11 @@ static inline bool __meminit early_page_initialised(unsigned long pfn) * Returns true when the remaining initialisation should be deferred until * later in the boot cycle when it can be parallelised. */ +/* IAMROOT20 20241005 + * 나중에 처리할 pfn을 구해서 first_deferred_pfn에 저장 + * - node의 마지막 zone(보통은 ZONE_NORMAL)일 때만, + * 처음 한 SECTION(128M)을 초기화하고 나머지는 나중에 처리(defer) + */ static bool __meminit defer_init(int nid, unsigned long pfn, unsigned long end_pfn) { @@ -674,7 +716,12 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) nr_initialised = 0; } + /* IAMROOT20_END 20240921 */ /* Always populate low zones for address-constrained allocations */ + /* IAMROOT20 20240928 + * zone end_pfn < node end_pfn -> return false + * node에서 마지막 zone(ZONE_NORMAL)의 경우에만, if문을 통과함 + */ if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) return false; @@ -684,6 +731,10 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) * We start only with one section of pages, more pages are added as * needed until the rest of deferred pages are initialized. */ + /* IAMROOT20 20240928 + * defer init이 필요한 page(pfn) 개수가 PAGES_PER_SECTION(0x8000)을 넘으면, + * SECTION 단위로 정렬되어 있는지 확인하고 first_deferred_pfn에 저장 + */ nr_initialised++; if ((nr_initialised > PAGES_PER_SECTION) && (pfn & (PAGES_PER_SECTION - 1)) == 0) { @@ -804,6 +855,11 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) * zone/node above the hole except for the trailing pages in the last * section that will be appended to the zone/node below. */ +/* IAMROOT20 20241012 + * spfn : hole의 시작 주소 + * epfn : hole의 끝 주소 + * pgcnt : hole로 설정할 수 있는 valid한 page의 개수 + */ static void __init init_unavailable_range(unsigned long spfn, unsigned long epfn, int zone, int node) @@ -816,6 +872,7 @@ static void __init init_unavailable_range(unsigned long spfn, pfn = pageblock_end_pfn(pfn) - 1; continue; } + /* IAMROOT20_END 20241019 */ __init_single_page(pfn_to_page(pfn), pfn, zone, node); __SetPageReserved(pfn_to_page(pfn)); pgcnt++; @@ -870,13 +927,18 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone * function. They do not exist on hotplugged memory. */ if (context == MEMINIT_EARLY) { + /* IAMROOT20 20240921 + * mirrored_kernelcore이고 ZONE_MOVABLE인 경우만 true + */ if (overlap_memmap_init(zone, &pfn)) continue; + /* IAMROOT20_START 20241005 */ if (defer_init(nid, pfn, zone_end_pfn)) { deferred_struct_pages = true; break; } } + /* IAMROOT20_END 20240928 */ page = pfn_to_page(pfn); __init_single_page(page, pfn, zone, nid); @@ -888,6 +950,9 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone * such that unmovable allocations won't be scattered all * over the place during system boot. */ + /* IAMROOT20 20241005 + * pageblock 단위(512개 page)로 migratetype을 지정 + */ if (pageblock_aligned(pfn)) { set_pageblock_migratetype(page, migratetype); cond_resched(); @@ -905,6 +970,11 @@ static void __init memmap_init_zone_range(struct zone *zone, unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages; int nid = zone_to_nid(zone), zone_id = zone_idx(zone); + /* IAMROOT20_START 20240928 */ + /* IAMROOT20 20240928 + * start_pfn ~ end_pfn + * - 각 memory pfn range에서 zone pfn range로 clamp 된 영역 + */ start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn); end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn); @@ -913,6 +983,7 @@ static void __init memmap_init_zone_range(struct zone *zone, memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn, zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); + /* IAMROOT20_END 20241005 */ /* IAMROOT20_START 20241012 */ if (*hole_pfn < start_pfn) init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid); @@ -1113,18 +1184,31 @@ static void __init adjust_zone_range_for_zone_movable(int nid, /* Only adjust if ZONE_MOVABLE is on this node */ if (zone_movable_pfn[nid]) { /* Size ZONE_MOVABLE */ + /* IAMROOT20 20240824 + * movable_zone = ZONE_NORMAL + */ if (zone_type == ZONE_MOVABLE) { *zone_start_pfn = zone_movable_pfn[nid]; *zone_end_pfn = min(node_end_pfn, arch_zone_highest_possible_pfn[movable_zone]); /* Adjust for ZONE_MOVABLE starting within this range */ + /* IAMROOT20 20240824 + * zone_type이 ZONE_MOVABLE이 아니고, + * zone_start_pfn < zone_movable_pfn < zone_end_pfn 인 경우 + * -> movable zone을 분리한다 + */ } else if (!mirrored_kernelcore && *zone_start_pfn < zone_movable_pfn[nid] && *zone_end_pfn > zone_movable_pfn[nid]) { *zone_end_pfn = zone_movable_pfn[nid]; /* Check if this whole range is within ZONE_MOVABLE */ + /* IAMROOT20 20240824 + * zone_type이 ZONE_MOVABLE이 아닌데, ZONE_MOVABLE에 포함되는 경우 + * (zone_start_pfn > zone_movable_pfn) + * -> 해당 영역 size를 0으로 만듦 + */ } else if (*zone_start_pfn >= zone_movable_pfn[nid]) *zone_start_pfn = *zone_end_pfn; } @@ -1234,11 +1318,17 @@ static unsigned long __init zone_spanned_pages_in_node(int nid, /* Get the start and end of the zone */ *zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); *zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high); + /* IAMROOT20 20240824 + * ZONE_MOVABLE을 고려하여 zone_start_pfn, zone_end_pfn을 조정한다 + */ adjust_zone_range_for_zone_movable(nid, zone_type, node_start_pfn, node_end_pfn, zone_start_pfn, zone_end_pfn); /* Check that this node has pages within the zone's required range */ + /* IAMROOT20 20240824 + * zone이 node 영역을 벗어난 경우, return 0 + */ if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn) return 0; @@ -1263,11 +1353,17 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat, unsigned long spanned, absent; unsigned long size, real_size; + /* IAMROOT20 20240824 + * node 영역 안에 있는 zone영역을 구함(hole 포함) + */ spanned = zone_spanned_pages_in_node(pgdat->node_id, i, node_start_pfn, node_end_pfn, &zone_start_pfn, &zone_end_pfn); + /* IAMROOT20 20240824 + * (node 영역 안에 있는) zone 영역에서 hole의 크기를 구함 + */ absent = zone_absent_pages_in_node(pgdat->node_id, i, node_start_pfn, node_end_pfn); @@ -1307,6 +1403,10 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages, * populated regions may not be naturally aligned on page boundary. * So the (present_pages >> 4) heuristic is a tradeoff for that. */ + /* IAMROOT20 20240824 + * spanned_pages가 present_pages 보다 25% 이상 큰 경우, + * pages를 present_pages로 변경 + */ if (spanned_pages > present_pages + (present_pages >> 4) && IS_ENABLED(CONFIG_SPARSEMEM)) pages = present_pages; @@ -1407,6 +1507,13 @@ void __meminit init_currently_empty_zone(struct zone *zone, * round what is now in bits to nearest long in bits, then return it in * bytes. */ +/* IAMROOT20 20240907 + * zone_start_pfn이 페이지 블록 경계에 맞도록 조정한 값이 zonesize. + * zonesize를 바탕으로 전체 페이지 개수를 구한 다음, 4bit를 곱해 + * usemap을 담을 공간을 bit 단위로 계산한다. + * 그리고 8 bytes(64) 단위로 올림하여, usemap을 담을 공간을 넉넉하게 계산하고, + * 이를 bytes 단위로 변환 후, 그 갯수를 반환한다. + */ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize) { unsigned long usemapsize; @@ -1420,6 +1527,10 @@ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned l return usemapsize / 8; } +/* IAMROOT20 20240907 + * __ref : .ref.text 섹션에 함수를 만들되, inline이 안되게 한다. + * 커널 초기화 이후에도 참조될 수 있게 된다. + */ static void __ref setup_usemap(struct zone *zone) { unsigned long usemapsize = usemap_size(zone->zone_start_pfn, @@ -1543,6 +1654,9 @@ static void __init free_area_init_core(struct pglist_data *pgdat) * is used by this zone for memmap. This affects the watermark * and per-cpu initialisations */ + /* IAMROOT20 20240824 + * 현재 zone의 모든 page의 디스크립터(struct page) 크기의 합을 구함(page 단위) + */ memmap_pages = calc_memmap_size(size, freesize); if (!is_highmem_idx(j)) { if (freesize >= memmap_pages) { @@ -1555,12 +1669,19 @@ static void __init free_area_init_core(struct pglist_data *pgdat) zone_names[j], memmap_pages, freesize); } + /* IAMROOT20_END 20240824 */ /* IAMROOT20_START 20240831 */ /* Account for reserved pages */ if (j == 0 && freesize > dma_reserve) { freesize -= dma_reserve; pr_debug(" %s zone: %lu pages reserved\n", zone_names[0], dma_reserve); } + /* IAMROOT20 20240831 + * 위에서 freesize를 모두 계산하고, + * highmem이 아닌 zone인 경우, 전역 변수 nr_kernel_pages에 freesize를 더한다. + * + * memmap한 페이지와 dma_reserve를 제외한 freesize를 nr_all_pages에 더한다. + */ if (!is_highmem_idx(j)) nr_kernel_pages += freesize; /* Charge for highmem memmap if there are enough kernel pages */ @@ -1578,6 +1699,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat) if (!size) continue; + /* IAMROOT20_END 20240831 */ /* IAMROOT20_START 20240907 */ set_pageblock_order(); setup_usemap(zone); init_currently_empty_zone(zone, zone->zone_start_pfn, size); @@ -1708,13 +1830,21 @@ static void __init free_area_init_node(int nid) calculate_node_totalpages(pgdat, start_pfn, end_pfn); + /* IAMROOT20 20240824 + * FLATMEM인 경우여서 분석x + */ alloc_node_mem_map(pgdat); pgdat_set_deferred_range(pgdat); + /* IAMROOT20_END 20240907 */ /* IAMROOT20_START 20240921 */ free_area_init_core(pgdat); lru_gen_init_pgdat(pgdat); } +/* IAMROOT20 20240921 + * N_MEMORY : 일반적인 물리 메모리 블록. + * N_NORMAL_MEMORY : CPU에서 접근 가능한 물리 메모리 블록. DMA는 제외. + */ /* Any regular or high memory on that node ? */ static void check_for_memory(pg_data_t *pgdat, int nid) { @@ -1740,6 +1870,7 @@ void __init setup_nr_node_ids(void) { unsigned int highest; + /* IAMROOT20_END 20240629 */ /* IAMROOT20_START 20240706 */ highest = find_last_bit(node_possible_map.bits, MAX_NUMNODES); nr_node_ids = highest + 1; } @@ -1838,6 +1969,10 @@ void __init free_area_init(unsigned long *max_zone_pfn) * enable future "sub-section" extensions of the memory map. */ pr_info("Early memory node ranges\n"); + /* IAMROOT20 20240824 + * 모든 mem range를 돌면서, + * subsection 단위(2M)로 mem_section->usage->subsection_map에 set + */ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, (u64)start_pfn << PAGE_SHIFT, @@ -1851,6 +1986,10 @@ void __init free_area_init(unsigned long *max_zone_pfn) for_each_node(nid) { pg_data_t *pgdat; + /* IAMROOT20 20240824 + * node online이 아닌 경우 -> memory-less node로 설정 + * node online인 경우 -> N_MEMORY 설정, check_for_memory() 호출 + */ if (!node_online(nid)) { pr_info("Initializing node %d as memoryless\n", nid); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 47421bedc12b7..ca544eb5f7193 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -377,10 +377,20 @@ int watermark_scale_factor = 10; bool mirrored_kernelcore __initdata_memblock; /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ +/* IAMROOT20 20240810 + * moveable_zone = ZONE_NORMAL + * find_usable_zone_for_movable() 설정됨. + */ int movable_zone; EXPORT_SYMBOL(movable_zone); #if MAX_NUMNODES > 1 +/* IAMROOT20 20240525 + * nr_node_ids = 16 + */ +/* IAMROOT20 20240824 + * setup_nr_node_ids() + */ unsigned int nr_node_ids __read_mostly = MAX_NUMNODES; unsigned int nr_online_nodes __read_mostly = 1; EXPORT_SYMBOL(nr_node_ids); @@ -434,6 +444,9 @@ static inline unsigned long *get_pageblock_bitmap(const struct page *page, static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM + /* IAMROOT20 20241005 + * 한 SECTION(128M) 안에서 몇 번 째 pfn 인지를 구함 + */ pfn &= (PAGES_PER_SECTION-1); #else pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn); @@ -5928,6 +5941,18 @@ void __ref build_all_zonelists(pg_data_t *pgdat) #endif } +/* IAMROOT20 20240831 + * zone_managed_pages = 4GB로 가정할 경우, + * batch = min(a ^ 32 >> 10, 2 ^ 20 / 2 ^ 12) + * batch = min(2048, 256) + * batch = 256 + * batch = 64 (batch /= 4) + * batch = rounddown_pow_of_two(64 + 32) - 1 + * (1UL << (fls(96) - 1)) - 1 + * (1UL << (7 - 1)) - 1 + * (64 - 1) + * batch = 63 + */ static int zone_batchsize(struct zone *zone) { #ifdef CONFIG_MMU diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 10d73a0dfcec7..beed1ad1e0b91 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -316,6 +316,9 @@ int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, pud_t *pud; pmd_t *pmd; + /* IAMROOT20 20240727 + * PMD size(2M)만큼 증가시키면서 pgd, p4d, pud, pmd populate + */ for (addr = start; addr < end; addr = next) { next = pmd_addr_end(addr, end); @@ -352,6 +355,10 @@ int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, } } else if (vmemmap_check_pmd(pmd, node, addr, next)) continue; + /* IAMROOT20 20240727 + * PMD size로 mapping을 실패한 경우, + * page size로 mapping을 시도한다 + */ if (vmemmap_populate_basepages(addr, next, node, altmap)) return -ENOMEM; } @@ -450,6 +457,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap) { + /* IAMROOT20 20240727 + * start : pfn이 가리키는 page 구조체 + * end : 마지막 page 구조체 + */ unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); int r; diff --git a/mm/sparse.c b/mm/sparse.c index c2afdb26039e5..69df3b181c695 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -38,6 +38,9 @@ EXPORT_SYMBOL(mem_section); * node the page belongs to. */ #if MAX_NUMNODES <= 256 +/* IAMROOT20 20240720 + * section_to_node_table[] : section -> node 매핑 테이블 + */ static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; #else static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; @@ -81,6 +84,9 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid) static int __meminit sparse_index_init(unsigned long section_nr, int nid) { + /* IAMROOT20 20240720 + * root는 *mem_section 배열에서의 index + */ unsigned long root = SECTION_NR_TO_ROOT(section_nr); struct mem_section *section; @@ -94,6 +100,10 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) if (mem_section[root]) return 0; + /* IAMROOT20 20240720 + * mem_section[root]가 초기화되어 있지 않으면, 메모리를 할당하고(section) + * mem_section[root]에 저장한다(mem_section[root] = section) + */ section = sparse_index_alloc(nid); if (!section) return -ENOMEM; @@ -115,6 +125,9 @@ static inline int sparse_index_init(unsigned long section_nr, int nid) * node. This keeps us from having to use another data structure. The * node information is cleared just before we store the real mem_map. */ +/* IAMROOT20 20240720 + * SECTION_NID_SHIFT 4 + */ static inline unsigned long sparse_encode_early_nid(int nid) { return ((unsigned long)nid << SECTION_NID_SHIFT); @@ -129,6 +142,12 @@ static inline int sparse_early_nid(struct mem_section *section) static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, unsigned long *end_pfn) { + /* IAMROOT20 20240720 + * MAX_PHYSMEM_BITS = 48 + * PAGE_SHIFT = 12 + * + * max_sparsemem_pfn = 1 << 36 = 0x10_0000_0000 + */ unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); /* @@ -185,6 +204,9 @@ static inline unsigned long first_present_section_nr(void) static void subsection_mask_set(unsigned long *map, unsigned long pfn, unsigned long nr_pages) { + /* IAMROOT20 20240824 + * subsection의 start, end 인덱스를 구해서, subsection_map에 set + */ int idx = subsection_map_index(pfn); int end = subsection_map_index(pfn + nr_pages - 1); @@ -203,9 +225,16 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) struct mem_section *ms; unsigned long pfns; + /* IAMROOT20 20240824 + * pfns : 현재 section의 pfn 수 + * pfn : 현재 section의 start pfn + */ pfns = min(nr_pages, PAGES_PER_SECTION - (pfn & ~PAGE_SECTION_MASK)); ms = __nr_to_section(nr); + /* IAMROOT20 20240824 + * subsection 단위로(2M) mem_section->usage->subsection_map에 표시 + */ subsection_mask_set(ms->usage->subsection_map, pfn, pfns); pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr, @@ -231,6 +260,16 @@ static void __init memory_present(int nid, unsigned long start, unsigned long en if (unlikely(!mem_section)) { unsigned long size, align; + /* IAMROOT20 20240720 + * ex) 4K, PA=48인 경우, + * NR_SECTION_ROOTS = 2^13(8192) + * + * INTERNODE_CACHE_SHIFT = 6 + * align = 64 + */ + /* IAMROOT20 20240809 + * size = 16 * 8192 = 131072 + */ size = sizeof(struct mem_section *) * NR_SECTION_ROOTS; align = 1 << (INTERNODE_CACHE_SHIFT); mem_section = memblock_alloc(size, align); @@ -240,8 +279,14 @@ static void __init memory_present(int nid, unsigned long start, unsigned long en } #endif + /* IAMROOT20 20240720 + * PAGE_SECTION_MASK = 0xFFFF_FFFF_FFFF_8000 + */ start &= PAGE_SECTION_MASK; mminit_validate_memmodel_limits(&start, &end); + /* IAMROOT20 20240720 + * section 크기(PAGES_PER_SECTION, 128MB default)단위로 for문 수행 + */ for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { unsigned long section = pfn_to_section_nr(pfn); struct mem_section *ms; @@ -250,6 +295,10 @@ static void __init memory_present(int nid, unsigned long start, unsigned long en set_section_nid(section, nid); ms = __nr_to_section(section); + /* IAMROOT20 20240720 + * 부팅 초기에는 section_mem_map의 하위 몇 비트를 + * nid와 section status를 표시하는 데 사용 + */ if (!ms->section_mem_map) { ms->section_mem_map = sparse_encode_early_nid(nid) | SECTION_IS_ONLINE; @@ -302,17 +351,32 @@ static void __meminit sparse_init_one_section(struct mem_section *ms, unsigned long pnum, struct page *mem_map, struct mem_section_usage *usage, unsigned long flags) { + /* IAMROOT20 20240803 + * SECTION_MAP_MASK = 0xFFFF_FFFF_FFFF_FFF0 + */ ms->section_mem_map &= ~SECTION_MAP_MASK; ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | SECTION_HAS_MEM_MAP | flags; ms->usage = usage; } +/* IAMROOT20 20240727 + * SECTION_BLOCKFLAGS_BITS 256 + * BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) = 4 + * + * usemap_size = 4 * 8 = 32 bytes + */ static unsigned long usemap_size(void) { return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long); } +/* IAMROOT20 20240727 + * sizeof(struct mem_section_usage) = 8 + * usemap_size() = 32 + * + * -> mem_section_usage.pageblock_flags[4]가 되는 효과 + */ size_t mem_section_usage_size(void) { return sizeof(struct mem_section_usage) + usemap_size(); @@ -346,6 +410,13 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, * from the same section as the pgdat where possible to avoid * this problem. */ + /* IAMROOT20 20240727 + * PAGE_SECTION_MASK << PAGE_SHIFT = 0xFFFF_FFFF_F800_0000 + * + * pgdat과 usemap이 다른 section에 있으면 상호 의존성이 생겨 다른 섹션이 + * usemap을 참조하고 있는 동안 해당 섹션을 제거할 수 없게 함 + * 이를 방지하기 위해서 pgdat과 usemap을 동일한 섹션에 할당 + */ goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); limit = goal + (1UL << PA_SECTION_SHIFT); nid = early_pfn_to_nid(goal >> PAGE_SHIFT); @@ -373,8 +444,16 @@ static void __init check_usemap_section_nr(int nid, old_pgdat_snr = NR_MEM_SECTIONS; } + /* IAMROOT20 20240727 + * usage 물리 주소를 PAGE_SHIFT 만큼 옮기게 되면 pfn이 되고, + * 이 값을 가지고 section 넘버를 구한다 + */ usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT); + /* IAMROOT20 20240803 + * usemap, node data(pgdat)이 다른 section에 할당된 경우 + * if문에서 return하지 않고 다음 코드로 넘어감 + */ if (usemap_snr == pgdat_snr) return; @@ -385,6 +464,11 @@ static void __init check_usemap_section_nr(int nid, old_usemap_snr = usemap_snr; old_pgdat_snr = pgdat_snr; + /* IAMROOT20_END 20240727 *//* IAMROOT20_START 20240803 */ + /* IAMROOT20 20240803 + * usemap이 할당된 nid, 현재 메모리의 nid가 다른 경우 + * usemap, node data(pgdat)이 할당된 section도 다름 + */ usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr)); if (usemap_nid != nid) { pr_info("node %d must be removed before remove section %ld\n", @@ -397,6 +481,10 @@ static void __init check_usemap_section_nr(int nid, * gather other removable sections for dynamic partitioning. * Just notify un-removable section's number here. */ + /* IAMROOT20 20240803 + * usemap이 할당된 nid, 현재 메모리의 nid가 같은 경우 + * usemap, node data(pgdat)이 할당된 section은 다름 + */ pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n", usemap_snr, pgdat_snr, nid); } @@ -415,6 +503,12 @@ static void __init check_usemap_section_nr(int nid, #endif /* CONFIG_MEMORY_HOTREMOVE */ #ifdef CONFIG_SPARSEMEM_VMEMMAP +/* IAMROOT20 20240727 + * sizeof(struct page) = 64(default) + * PAGES_PER_SECTION = 1 << 15 + * + * -> 64 * (1<<15) = 0x20_0000(2M) + */ static unsigned long __init section_map_size(void) { return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE); @@ -457,6 +551,9 @@ static inline void __meminit sparse_buffer_free(unsigned long size) static void __init sparse_buffer_init(unsigned long size, int nid) { + /* IAMROOT20 20240727 + * MAX_DMA_ADDRESS = PAGE_OFFSET(0xffff_0000_0000_0000) + */ phys_addr_t addr = __pa(MAX_DMA_ADDRESS); WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */ /* @@ -511,12 +608,22 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, unsigned long pnum; struct page *map; + /* IAMROOT20 20240727 + * usemap 메모리 할당 + */ usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), mem_section_usage_size() * map_count); if (!usage) { pr_err("%s: node[%d] usemap allocation failed", __func__, nid); goto failed; } + /* IAMROOT20 20240727 + * section_map_size() : 2M + * + * page 구조체 배열 메모리 할당 + * - map_count * section_map_size() 크기 만큼 memblock에서 메모리를 할당 + * - sparsemap_buf가 시작 주소를 가리킴 + */ sparse_buffer_init(map_count * section_map_size(), nid); for_each_present_section_nr(pnum_begin, pnum) { unsigned long pfn = section_nr_to_pfn(pnum); @@ -534,10 +641,16 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, goto failed; } check_usemap_section_nr(nid, usage); + /* IAMROOT20 20240803 + * mem_section 구조체 필드들(section_mem_map, usage) 설정 + */ sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage, SECTION_IS_EARLY); usage = (void *) usage + mem_section_usage_size(); } + /* IAMROOT20 20240803 + * sparsemap_buf에서 사용하지 않고 남아있는 메모리를 free + */ sparse_buffer_fini(); return; failed: @@ -569,6 +682,7 @@ void __init sparse_init(void) /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */ set_pageblock_order(); + /* IAMROOT20_END 20240720 *//* IAMROOT20_START 20240727 */ for_each_present_section_nr(pnum_begin + 1, pnum_end) { int nid = sparse_early_nid(__nr_to_section(pnum_end)); @@ -577,6 +691,9 @@ void __init sparse_init(void) continue; } /* Init node with sections in range [pnum_begin, pnum_end) */ + /* IAMROOT20 20240803 + * 같은 node에 속한 section들을 초기화 + */ sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count); nid_begin = nid; pnum_begin = pnum_end; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1d13d71687d73..23be64097136c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2410,6 +2410,9 @@ void __init vm_area_add_early(struct vm_struct *vm) struct vm_struct *tmp, **p; BUG_ON(vmap_initialized); + /* IAMROOT20 20240420 + * virt addr이 작은 것부터 큰것 순서대로 삽입 + */ for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { if (tmp->addr >= vm->addr) { BUG_ON(tmp->addr < vm->addr + vm->size); diff --git a/scripts/dtc/libfdt/fdt.c b/scripts/dtc/libfdt/fdt.c index 20c6415b9ced1..aebacc3fe1cb2 100644 --- a/scripts/dtc/libfdt/fdt.c +++ b/scripts/dtc/libfdt/fdt.c @@ -96,6 +96,10 @@ int fdt_check_header(const void *fdt) if (fdt_magic(fdt) != FDT_MAGIC) return -FDT_ERR_BADMAGIC; + /* IAMROOT20 20240224 + * can_assume : DTB에 운용자가 값을 설정하지 않는 경우 false, + * 그렇지 않은 경우에는 true를 반환한다. + */ if (!can_assume(LATEST)) { if ((fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION) || (fdt_last_comp_version(fdt) > @@ -139,6 +143,10 @@ int fdt_check_header(const void *fdt) const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len) { + /* IAMROOT20 20240511 + * uoffset : structure block 내에서의 offset + * absoffset : fdt 시작 주소로부터 offset + */ unsigned int uoffset = offset; unsigned int absoffset = offset + fdt_off_dt_struct(fdt); @@ -156,9 +164,16 @@ const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len) || ((offset + len) > fdt_size_dt_struct(fdt))) return NULL; + /* IAMROOT20 20240511 + * fdt + structure block offset + structure block 내에서의 offset + */ return fdt_offset_ptr_(fdt, offset); } +/* IAMROOT20 20240511 + * startoffset이 가리키는 tag를 반환 + * nextoffset : 다음 tag의 offset을 저장 + */ uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) { const fdt32_t *tagp, *lenp; @@ -167,6 +182,10 @@ uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) const char *p; *nextoffset = -FDT_ERR_TRUNCATED; + /* IAMROOT20 20240511 + * 현재 offset이 가리키는 위치의 주소를 반환 + * - fdt + 'structure block offset' + 'structure block 내에서의 offset' + */ tagp = fdt_offset_ptr(fdt, offset, FDT_TAGSIZE); if (!can_assume(VALID_DTB) && !tagp) return FDT_END; /* premature end */ @@ -220,9 +239,17 @@ uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) return tag; } +/* IAMROOT20 20240518 + * offset을 그대로 return + * - offset이 유효한지 확인, 현재 offset의 tag가 FDT_BEGIN_NODE인 지 확인 + */ int fdt_check_node_offset_(const void *fdt, int offset) { if (!can_assume(VALID_INPUT) + /* IAMROOT20 20240511 + * FDT_TAGSIZE = sizeof(fdt32_t) = 4 + * - offset % FDT_TAGSIZE : 4바이트 단위로 정렬이 되어 있지 않는 경우 + */ && ((offset < 0) || (offset % FDT_TAGSIZE))) return -FDT_ERR_BADOFFSET; @@ -232,6 +259,10 @@ int fdt_check_node_offset_(const void *fdt, int offset) return offset; } +/* IAMROOT20 20240518 + * offset을 그대로 return + * - offset이 유효한지 확인, 현재 offset의 tag가 FDT_PROP인지 확인 + */ int fdt_check_prop_offset_(const void *fdt, int offset) { if (!can_assume(VALID_INPUT) @@ -244,6 +275,9 @@ int fdt_check_prop_offset_(const void *fdt, int offset) return offset; } +/* IAMROOT20 20240511 + * offset(현재 node)이 가리키는 위치의 다음 node의 offset를 반환 + */ int fdt_next_node(const void *fdt, int offset, int *depth) { int nextoffset = 0; @@ -268,14 +302,27 @@ int fdt_next_node(const void *fdt, int offset, int *depth) break; case FDT_END_NODE: + /* IAMROOT20 20240511 + * (*depth) < 0 인 경우 + * -> 부모 노드로 올라가는 경우이므로, + * 여기서 nextoffset을 return하여 하위 노드만 돌도록 함 + * (부모 노드로 올라가지 못하도록) + */ if (depth && ((--(*depth)) < 0)) return nextoffset; break; case FDT_END: + /* IAMROOT20 20240511 + * fdt 끝(END) 인 경우 또는 + * nextoffset == -FDT_ERR_TRUNCATED 인 경우 + */ if ((nextoffset >= 0) || ((nextoffset == -FDT_ERR_TRUNCATED) && !depth)) return -FDT_ERR_NOTFOUND; + /* IAMROOT20 20240511 + * nextoffset == -FDT_ERR_BADSTRUCTURE 일 경우 + */ else return nextoffset; } diff --git a/scripts/dtc/libfdt/fdt_ro.c b/scripts/dtc/libfdt/fdt_ro.c index 9f6c551a22c2f..5f4f1b41b8421 100644 --- a/scripts/dtc/libfdt/fdt_ro.c +++ b/scripts/dtc/libfdt/fdt_ro.c @@ -31,6 +31,10 @@ static int fdt_nodename_eq_(const void *fdt, int offset, return 0; } +/* IAMROOT20 20240518 + * string block 안에서 stroffset이 가리키는 위치의 string(name)을 return + * *lenp : name의 길이를 저장 + */ const char *fdt_get_string(const void *fdt, int stroffset, int *lenp) { int32_t totalsize; @@ -52,6 +56,10 @@ const char *fdt_get_string(const void *fdt, int stroffset, int *lenp) goto fail; err = -FDT_ERR_BADOFFSET; + /* IAMROOT20 20240518 + * absoffset : fdt 안에서 stroffset이 가리키는 위치까지 offset + * - stroffset : string block 안에서의 offset + */ absoffset = stroffset + fdt_off_dt_strings(fdt); if (absoffset >= (unsigned)totalsize) goto fail; @@ -177,6 +185,9 @@ int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size) const struct fdt_reserve_entry *re; FDT_RO_PROBE(fdt); + /* IAMROOT20 20240406 + * fdt의 memory reservation block에서 n번째 위치의 fdt_reserve_enty 포인터를 가져옴 + */ re = fdt_mem_rsv(fdt, n); if (!can_assume(VALID_INPUT) && !re) return -FDT_ERR_BADOFFSET; @@ -198,6 +209,9 @@ int fdt_num_mem_rsv(const void *fdt) return -FDT_ERR_TRUNCATED; } +/* IAMROOT20 20240518 + * 현재 offset 위치에서 다음 FDT_PROP tag의 offset을 return + */ static int nextprop_(const void *fdt, int offset) { uint32_t tag; @@ -274,7 +288,7 @@ int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen) const char *q; while (*p == '/') { - p++; + :p++; if (p == end) return offset; } @@ -297,6 +311,10 @@ int fdt_path_offset(const void *fdt, const char *path) return fdt_path_offset_namelen(fdt, path, strlen(path)); } +/* IAMROOT20 20240518 + * 현재 nodeoffset 위치의 node name 주소를 return + * *len : node name의 길이를 저장 + */ const char *fdt_get_name(const void *fdt, int nodeoffset, int *len) { const struct fdt_node_header *nh = fdt_offset_ptr_(fdt, nodeoffset); @@ -315,6 +333,10 @@ const char *fdt_get_name(const void *fdt, int nodeoffset, int *len) * give only the leaf name (after all /). The actual tree * contents are loosely checked. */ + /* IAMROOT20 20240518 + * ex) nameptr = "/memory/gpu" + * ^-- leaf + */ const char *leaf; leaf = strrchr(nameptr, '/'); if (leaf == NULL) { @@ -353,6 +375,10 @@ int fdt_next_property_offset(const void *fdt, int offset) return nextprop_(fdt, offset); } +/* IAMROOT20 20240518 + * offset이 가리키는 위치의 fdt_property 포인터 return + * *lenp : property value 길이 저장 + */ static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt, int offset, int *lenp) @@ -367,6 +393,9 @@ static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt, return NULL; } + /* IAMROOT20 20240518 + * prop : offset이 가리키는 위치의 가상 '주소' + */ prop = fdt_offset_ptr_(fdt, offset); if (lenp) @@ -466,6 +495,11 @@ const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, return prop->data; } +/* IAMROOT20 20240518 + * offset이 가리키는 property value(prop->data)를 return + * *namep : property name 저장 + * *lenp : name 길이 저장 + */ const void *fdt_getprop_by_offset(const void *fdt, int offset, const char **namep, int *lenp) { diff --git a/scripts/dtc/libfdt/libfdt.h b/scripts/dtc/libfdt/libfdt.h index 77ccff19911ef..44da80cf01999 100644 --- a/scripts/dtc/libfdt/libfdt.h +++ b/scripts/dtc/libfdt/libfdt.h @@ -142,6 +142,9 @@ static inline uint32_t fdt32_ld(const fdt32_t *p) { const uint8_t *bp = (const uint8_t *)p; + /* IAMROOT20 20231216 + * 0xedfe0dd0 -> 0xd00dfeed + */ return ((uint32_t)bp[0] << 24) | ((uint32_t)bp[1] << 16) | ((uint32_t)bp[2] << 8) @@ -246,6 +249,10 @@ int fdt_next_subnode(const void *fdt, int offset); /**********************************************************************/ #define fdt_get_header(fdt, field) \ (fdt32_ld(&((const struct fdt_header *)(fdt))->field)) +/* IAMROOT20 20231216 + * fdt_magic -> fdt32_ld(&fdt->magic) + * fdt_totalsize -> fdt32_ld(&fdt->totalsize) + */ #define fdt_magic(fdt) (fdt_get_header(fdt, magic)) #define fdt_totalsize(fdt) (fdt_get_header(fdt, totalsize)) #define fdt_off_dt_struct(fdt) (fdt_get_header(fdt, off_dt_struct)) diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c index 68d19632aeb72..d4d62f904500a 100644 --- a/security/lockdown/lockdown.c +++ b/security/lockdown/lockdown.c @@ -71,6 +71,16 @@ static int lockdown_is_locked_down(enum lockdown_reason what) return 0; } +/* IAMROOT20 20240203 + * static struct security_hook_list lockdown_hooks[] __ro_after_init = { + * { + * .head = &security_hook_heads.locked_down, + * .hook = { + * .locked_down = lockdown_is_locked_down + * } + * } + * } + */ static struct security_hook_list lockdown_hooks[] __ro_after_init = { LSM_HOOK_INIT(locked_down, lockdown_is_locked_down), }; @@ -157,6 +167,13 @@ static int __init lockdown_secfs_init(void) core_initcall(lockdown_secfs_init); +/* IAMROOT20 20240203 + * ex) DEFINE_EARLY_LSM(lockdown) = { + .name = "lockdown", + .init = lockdown_lsm_init, + }; + static struct lsm_info __early_lsm_lockdown __used __section(".early_lsm_info.init") __aligned(sizeof(unsigned long)) + */ #ifdef CONFIG_SECURITY_LOCKDOWN_LSM_EARLY DEFINE_EARLY_LSM(lockdown) = { #else diff --git a/security/security.c b/security/security.c index d5ff7ff45b776..6bb0b34bacb1e 100644 --- a/security/security.c +++ b/security/security.c @@ -75,6 +75,9 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX + 1] = { [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality", }; +/* IAMROOT20 20240203 + * security_hook_heads 선언부 + */ struct security_hook_heads security_hook_heads __ro_after_init; static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain); @@ -240,6 +243,9 @@ static void __init initialize_lsm(struct lsm_info *lsm) int ret; init_debug("initializing %s\n", lsm->name); + /* IAMROOT20 20240203 + * ex) lsm->init(): lockdown_lsm_init() + */ ret = lsm->init(); WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); } @@ -399,15 +405,27 @@ static void __init ordered_lsm_init(void) kfree(ordered_lsms); } +/* IAMROOT20 20240203 + * https://scienceon.kisti.re.kr/commons/util/originalView.do?cn=JAKO200311921893007&oCn=JAKO200311921893007&dbt=JAKO&journal=NJOU00291864 + * https://lesstif.gitbook.io/web-service-hardening/selinux + */ int __init early_security_init(void) { struct lsm_info *lsm; + /* IAMROOT20 20240203 + * security_hook_heads.binder_set_context_mgr.first = NULL; + * security_hook_heads.binder_transaction.first = NULL; + * ... + */ #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ INIT_HLIST_HEAD(&security_hook_heads.NAME); #include "linux/lsm_hook_defs.h" #undef LSM_HOOK + /* IAMROOT20 20240203 + * ex) lsm -> __early_lsm_lockdown + */ for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) { if (!lsm->enabled) lsm->enabled = &lsm_enabled_true; @@ -522,6 +540,9 @@ void __init security_add_hooks(struct security_hook_list *hooks, int count, int i; for (i = 0; i < count; i++) { + /* IAMROOT20 20240203 + * ex) lockdown_hooks[0].lsm = "lockdown" + */ hooks[i].lsm = lsm; hlist_add_tail_rcu(&hooks[i].list, hooks[i].head); } @@ -530,6 +551,9 @@ void __init security_add_hooks(struct security_hook_list *hooks, int count, * Don't try to append during early_security_init(), we'll come back * and fix this up afterwards. */ + /* IAMROOT20 20240203 + * early_security_init() 로직에서는 수행되지 않음 + */ if (slab_is_available()) { if (lsm_append(lsm, &lsm_names) < 0) panic("%s - Cannot get early memory.\n", __func__); diff --git a/tools/testing/memblock/linux/mmzone.h b/tools/testing/memblock/linux/mmzone.h index 134f8eab0768f..e6c990f50a952 100644 --- a/tools/testing/memblock/linux/mmzone.h +++ b/tools/testing/memblock/linux/mmzone.h @@ -23,6 +23,10 @@ enum zone_type { #define pageblock_order MAX_ORDER #define pageblock_nr_pages BIT(pageblock_order) #define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) +/* IAMROOT20 20241012 + * pageblock_nr_pages : 1024 + * pageblock_start_pfn : pfn 주소의 하위 10개의 비트를 0으로 설정 -> align down +*/ #define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) struct zone {