From 15a5449db88915c71065bd050799659113204f7a Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 9 Sep 2023 09:03:52 +0000 Subject: [PATCH 001/104] IAMROOT20 20230909 - Add comments to 'head.S' - create_idmap --- arch/arm64/kernel/head.S | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4936d8eace3b4..c58ee42a772c4 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -557,9 +557,14 @@ SYM_FUNC_START_LOCAL(create_idmap) mov x5, SWAPPER_RW_MMUFLAGS mov x6, #SWAPPER_BLOCK_SHIFT bl remap_region - /* IAMROOT20_END 20230826 */ + /* IAMROOT20_END 20230826 *//* IAMROOT20_START 20230909 */ /* Remap the FDT after the kernel image */ + /* IAMROOT20 20230909 + * 커널의 끝 주소를 2MB를 더해준 다음, 내림 연산을 하여 2MB 단위로 정렬한다. + * 매핑한 FDT의 물리 주소를 2MB 단위로 정렬한다. + * FDT의 영역을 4MB 크기로 잡아서, 물리 주소에 RW 속성으로 매핑한다. + */ adrp x1, _text adrp x22, _end + SWAPPER_BLOCK_SIZE bic x2, x22, #SWAPPER_BLOCK_SIZE - 1 @@ -578,6 +583,8 @@ SYM_FUNC_START_LOCAL(create_idmap) cbnz x19, 0f // skip cache invalidation if MMU is on dmb sy + /* + * adrp x0, init_idmap_pg_dir adrp x1, init_idmap_pg_end bl dcache_inval_poc From 957809e07e6c3ab1252f82cb45a37ead24fa23dd Mon Sep 17 00:00:00 2001 From: fehead Date: Sun, 10 Sep 2023 10:23:11 +0900 Subject: [PATCH 002/104] IAMROOT20 20230909 - Add comments to 'head.S' - create_idmap - Remap the FDT --- arch/arm64/include/asm/assembler.h | 25 +++++++++++++++++++++++++ arch/arm64/kernel/head.S | 14 +++++++++++--- arch/arm64/mm/cache.S | 6 ++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 4081b8b685e97..7dbcb1bb827fe 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -436,7 +436,19 @@ alternative_endif * fixup: optional label to branch to on user fault * Corrupts: start, end, tmp */ + /* + * IAMROOT20 20230909: + * cache.S::dcache_clean_poc dcache_by_line_op cvac, sy, x0, x1, x2, x3 + * + */ .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup + /* + * IAMROOT20 20230909: + * tmp = linesz - 1; tmp = 63 + * start = start & ~(tmp); // start을 캐쉬라인 사이즈로 align + * dcache_op@: + * dc civac start + */ sub \tmp, \linesz, #1 bic \start, \start, \tmp .Ldcache_op\@: @@ -457,11 +469,20 @@ alternative_endif .endif .endif .endif + /* + * IAMROOT20 20230909: + * start += linesz; start += 64 + * if(start < end) goto dcache_op@; + * dsb domain; dsb sy + */ add \start, \start, \linesz cmp \start, \end b.lo .Ldcache_op\@ dsb \domain +/* + * IAMROOT20_END 20230909 + */ _cond_uaccess_extable .Ldcache_op\@, \fixup .endm @@ -669,6 +690,10 @@ alternative_endif orr \pte, \phys, \phys, lsr #36 and \pte, \pte, #PTE_ADDR_MASK #else + /* + * IAMROOT20 20230909: + * pte = phys; + */ mov \pte, \phys #endif .endm diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index c58ee42a772c4..d5e6416472166 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -132,6 +132,11 @@ SYM_CODE_START(primary_entry) * of the primary boot code to the PoC so we can safely execute it with * the MMU off. */ + /* + * IAMROOT20 20230909: + * MMU가 ON 상태이면 __idmap_text_start부터 __idmap_text_end 까지 + * cache clean 한다. + */ cbz x19, 0f adrp x0, __idmap_text_start adr_l x1, __idmap_text_end @@ -465,6 +470,10 @@ SYM_FUNC_START_LOCAL(remap_region) // Get the index offset for the start of the last level table lsr x1, x1, x6 + /* + * IAMROOT20 20230909: + * bic x1, x1, 0x1ff // PAGE_SHIFT = 12일 경우. + */ bfi x1, xzr, #0, #PAGE_SHIFT - 3 // Derive the start and end indexes into the last level table @@ -557,8 +566,9 @@ SYM_FUNC_START_LOCAL(create_idmap) mov x5, SWAPPER_RW_MMUFLAGS mov x6, #SWAPPER_BLOCK_SHIFT bl remap_region - /* IAMROOT20_END 20230826 *//* IAMROOT20_START 20230909 */ + /* IAMROOT20_END 20230826 */ + /* IAMROOT20_START 20230909 */ /* Remap the FDT after the kernel image */ /* IAMROOT20 20230909 * 커널의 끝 주소를 2MB를 더해준 다음, 내림 연산을 하여 2MB 단위로 정렬한다. @@ -583,8 +593,6 @@ SYM_FUNC_START_LOCAL(create_idmap) cbnz x19, 0f // skip cache invalidation if MMU is on dmb sy - /* - * adrp x0, init_idmap_pg_dir adrp x1, init_idmap_pg_end bl dcache_inval_poc diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 5d68510ca3fb0..e39fbd29f5e42 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -195,6 +195,12 @@ SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) * - end - virtual end address of region */ SYM_FUNC_START(__pi_dcache_clean_poc) +/* + * IAMROOT20 20230909: + * + * dcache_line_size x2, x3 ; x2 = 64 + * dcache_by_myline_op cvac, sy, x0, x1, x2, x3 + */ dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret SYM_FUNC_END(__pi_dcache_clean_poc) From b137f635e3757438c2b96695a57ac886830e9e0e Mon Sep 17 00:00:00 2001 From: fehead Date: Sun, 17 Sep 2023 13:46:42 +0900 Subject: [PATCH 003/104] IAMROOT20 20230916 - Add comments to 'head.S' IDmap Table changes based on the number of VA_BITS. --- arch/arm64/include/asm/assembler.h | 39 ++++++++++++++++++++++++-- arch/arm64/include/asm/pgtable-hwdef.h | 25 +++++++++++++++++ arch/arm64/kernel/head.S | 13 +++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 7dbcb1bb827fe..d199275369d78 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -415,7 +415,28 @@ alternative_cb_end csel \tmp0, \tmp1, \tmp0, hi bfi \tcr, \tmp0, \pos, #3 .endm - + + /* IAMROOT20 20230916: + * __dcache_op_workaround_clean_cache cvac __idmap_text_start + * .pushsection .altinstructions, "a" + * // arm64/include/asm/alternative.h + * .word 661f - . + * .word 663f - . + * .hword ARM64_WORKAROUND_CLEAN_CACHE + * .byte 662f-661f + * .byte 664f-663f + * .popsection + * 661: + * dc cvac, __idmap_text_start + * 662: + * .subsection 1 + * 663: + * dc civac, __idmap_text_start + * 664: + * .org . - (664b-663b) + (662b-661b) + * .org . - (662b-661b) + (664b-663b) + * .previous + */ .macro __dcache_op_workaround_clean_cache, op, addr alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE dc \op, \addr @@ -439,7 +460,7 @@ alternative_endif /* * IAMROOT20 20230909: * cache.S::dcache_clean_poc dcache_by_line_op cvac, sy, x0, x1, x2, x3 - * + * exam) dcache_by_myline_op(cvac, sy, __idmap_text_start, __idmap_text_end, x2, x3) */ .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup /* @@ -498,7 +519,17 @@ alternative_endif * Corrupts: start, end, tmp1, tmp2 */ .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup + /* + * IAMROOT20 20230916: + * tmp1 = cache_line_size 예) 64 + */ dcache_line_size \tmp1, \tmp2 + /* + * IAMROOT20 20230916: + * exam) dcache_by_line_op cvac, sy, x0, x1, x2, x3 + * x2 = 64 + * dcache_by_myline_op cvac, sy, __idmap_text_start, __idmap_text_end, x2, x3 + */ dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup .endm @@ -687,6 +718,10 @@ alternative_endif * We assume \phys is 64K aligned and this is guaranteed by only * supporting this configuration with 64K pages. */ + /* + * IAMROOT20 20230916: + * pte = (phys | (phys >> 36) & 0x0000_ffff_ffff_f000); + */ orr \pte, \phys, \phys, lsr #36 and \pte, \pte, #PTE_ADDR_MASK #else diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index f658aafc47dfa..4d1a475b0bfa2 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -46,6 +46,12 @@ * PMD_SHIFT determines the size a level 2 page table entry can map. */ #if CONFIG_PGTABLE_LEVELS > 2 + /* + * IAMROOT20 20230916: + * exam) 39 VA_BITS, 4k + * PMD_SHIFT 21 + * PMD_SIZE SZ_2M + */ #define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) @@ -66,6 +72,14 @@ * PGDIR_SHIFT determines the size a top-level page table entry can map * (depending on the configuration, this level can be 0, 1 or 2). */ + /* + * IAMROOT20 20230916: + * exam) VA_BITS==36 on 16k(14bit) CONFIG_PGTABLE_LEVELS = 2 + * PGDIR_SHIFT : 25 + * PGDIR_SIZE : 1 << 25 SZ_32M + * PGDIR_MASK : 0xffff_ffff_fe00_0000 + * PTRS_PER_PGD : 11 (36 - 25) + */ #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -155,8 +169,19 @@ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ +/* + * IAMROOT20 20230916: + * CONFIG_ARM64_PA_BITS_52 일경우 + * PAGE_SHIFT 16 + * PTE_ADDR_LOW 0x0000_ffff_ffff_0000 ((1 << (48 - 16)) - 1) << 16 + */ #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 +/* + * IAMROOT20 20230916: + * PTE_ADDR_HIGH 0xf << 12 0xf000 + * PTE_ADDR_MASK 0x0000_ffff_ffff_f000 + */ #define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12) #define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) #define PTE_ADDR_HIGH_SHIFT 36 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index d5e6416472166..f476266245904 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -517,6 +517,15 @@ SYM_FUNC_START_LOCAL(create_idmap) * requires more than 47 or 48 bits, respectively. */ #if (VA_BITS < 48) + /* + * IAMROOT20 20230916: + * exam) VA_BITS=47 on 16k(14bit) + * IDMAP_PGD_ORDER = (47-36) = 11 + * EXTRA_SHIFT = (36 + 14 - 3) = 47 + * VA_BITS=39 on 4k(12bit) + * IDMAP_PGD_ORDER = 30 + * EXTRA_SHIFT = 39 + */ #define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT) #define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) @@ -534,6 +543,8 @@ SYM_FUNC_START_LOCAL(create_idmap) /* IAMROOT20 20230805 * PGDIR_SHIFT : PGD의 인덱스를 가져오기 위해 shift 해야 하는 횟수. * IDMAP_PGD_ORDER : PGD의 사이즈를 의미한다. + * exam) 9 : 48bit, 4k page : 48-39 + * exam) 10: 52bit, 64k : 52 - 42 */ #define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT) #define EXTRA_SHIFT @@ -593,10 +604,12 @@ SYM_FUNC_START_LOCAL(create_idmap) cbnz x19, 0f // skip cache invalidation if MMU is on dmb sy + /* IAMROOT20_START 20230916 */ adrp x0, init_idmap_pg_dir adrp x1, init_idmap_pg_end bl dcache_inval_poc 0: ret x28 + /* IAMROOT20_END 20230916 */ SYM_FUNC_END(create_idmap) SYM_FUNC_START_LOCAL(create_kernel_mapping) From 91bce6f6ac2d2aab3e2f95f91a258721b3bfe52b Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 7 Oct 2023 13:30:07 +0000 Subject: [PATCH 004/104] IAMROOT20 20231007 - Add comments to 'head.S' & 'assembler.h' --- arch/arm64/include/asm/sysreg.h | 26 ++++++++++++++++++++++++++ arch/arm64/kernel/head.S | 2 ++ 2 files changed, 28 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index eefd712f24303..a0bdde444313e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -585,10 +585,36 @@ #define ENDIAN_SET_EL1 0 #endif + +/* IAMROOT20 20231007 + * LSMAOE : Multiple Load/Store(AArch32) 명령어로 메모리 접근 시 인터럽트 허용 여부 설정 + * nTLSMD : Multiple Load/Store(AArch32) 명령어로 device 접근 시 fault 여부 설정 + * EIS : EL1의 context synchronizing event 여부에 따라 exception 설정 + * TSCXT : EL0의 SCXTNUM_EL0 접근 허용 여부 설정 + * EOS : EL1에서 반환되는 exception의 context synchronizing event 여부에 따라 비트 설정 +*/ #define INIT_SCTLR_EL1_MMU_OFF \ (ENDIAN_SET_EL1 | SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | \ SCTLR_EL1_EIS | SCTLR_EL1_TSCXT | SCTLR_EL1_EOS) +/* IAMROOT20 20231007 + * M : 1단계 주소 변환(EL0/EL1)에 대한 MMU 활성화 여부 설정 + * C : EL0/EL1에서 노멀 메모리에 접근되는 모든 데이터, + * 혹은 1변환 테이블에 접근되는 노멀 메모리에 대해 Cacheability 여부 설정 + * SA : EL1의 Stack Pointer 레지스터 얼라인먼트 체크 설정 + * SA0 : EL0의 Stack Pointer 레지스터 얼라인먼트 체크 설정 + * SED : AArch32의 EL0에서 SETEND 명령어 실행 허용 여부 설정 + * I : EL0/EL1에서 노멀 메모리에 접근하는 모든 명령어의 Cacheability 여부 설정 + * DZE : EL0에서 캐시를 제어하는 'DC ZCA' 명령어의 실행 허용 여부 설정 + * UCT : EL0에서 캐시의 동작을 설정하는 CTR_EL0 레지스터 접근 여부 설정 + * nTWI : EL0에서 WFI 명령어 실행 허용 여부 설정 + * IESB : Implicit Error Synchronization event 허용 여부 설정 + * SPAN : EL1에서 exception 발생 시 Privileged Access Never 활성화 여부 설정 + * ITFSB : Tag Check Fault의 synchronized 여부 설정 + * UCI : EL0에서 캐시를 설정하는 명령어 + * (DC CVAU, DC CIVAC, DC CVAC, DC CVAP, IC IVAU) 실행 허용 여부 설정 + * EPAN : Privileged Access Never 강화 여부 설정 +*/ #define INIT_SCTLR_EL1_MMU_ON \ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | \ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I | \ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index f476266245904..d12d2d9f2f726 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -736,11 +736,13 @@ SYM_FUNC_END(__primary_switched) * * x0: whether we are being called from the primary boot path with the MMU on */ +/* IAMROOT20_START 20231007 */ SYM_FUNC_START(init_kernel_el) mrs x1, CurrentEL cmp x1, #CurrentEL_EL2 b.eq init_el2 +/* IAMROOT_END 20231007 */ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) mov_q x0, INIT_SCTLR_EL1_MMU_OFF pre_disable_mmu_workaround From e47817a6c32a0e1a4c0725765ef6ed435828947c Mon Sep 17 00:00:00 2001 From: SoominCho Date: Fri, 20 Oct 2023 15:06:21 +0000 Subject: [PATCH 005/104] IAMROOT20 20231014 - Add comments to 'head.S' --- arch/arm64/kernel/head.S | 5 ++++- arch/arm64/mm/proc.S | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index d12d2d9f2f726..e0b233eb0e2e5 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -742,7 +742,7 @@ SYM_FUNC_START(init_kernel_el) cmp x1, #CurrentEL_EL2 b.eq init_el2 -/* IAMROOT_END 20231007 */ +/* IAMROOT_END 20231007 */ /* IAMROOT20_START 20231014 */ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) mov_q x0, INIT_SCTLR_EL1_MMU_OFF pre_disable_mmu_workaround @@ -782,6 +782,9 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) * making it impossible to start in nVHE mode. Is that * compliant with the architecture? Absolutely not! */ + /* IAMROOT20 20231014 + * 일부 CPU의 경우 E2H설정이 1로 고정되어 nVHE로 변경할 수 없다. + */ mrs x0, hcr_el2 and x0, x0, #HCR_E2H cbz x0, 1f diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c2cb437821ca4..ced12af153d58 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -413,6 +413,7 @@ SYM_FUNC_START(__cpu_setup) enable_dbg // since this is per-cpu reset_pmuserenr_el0 x1 // Disable PMU access from EL0 reset_amuserenr_el0 x1 // Disable AMU access from EL0 + /* IAMROOT20_END 20231014 */ /* * Default values for VMSA control registers. These will be adjusted From 3f0926b26f9111e93ac6954eed50da225e150dd2 Mon Sep 17 00:00:00 2001 From: hyongwukim Date: Tue, 24 Oct 2023 23:16:50 +0900 Subject: [PATCH 006/104] IAMROOT20 20231021 - Add comments to 'head.S' & 'assembler.h' & 'proc.S' --- arch/arm64/include/asm/assembler.h | 9 +++++++++ arch/arm64/kernel/head.S | 3 +++ arch/arm64/mm/proc.S | 14 ++++++++++++-- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index d199275369d78..421889aa4afb6 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -392,6 +392,11 @@ alternative_cb_end * this number conveniently equals the number of leading zeroes in * the physical address of _end. */ + /* IAMROOT20 20231021 + * reg <- 0x0000_0000_42c8_0000(_end) + * reg <- 0x0000_0000_42c8_0000 | (0x0001_0000_0000_0000 - 1) = 0x0000_0000_42c8_0000 | 0x0000_ffff_ffff_ffff = 0x0000_ffff_ffff_ffff + * reg = 16 (clz: MSB부터 0의 숫자를 카운트) + */ .macro idmap_get_t0sz, reg adrp \reg, _end orr \reg, \reg, #(1 << VA_BITS_MIN) - 1 @@ -706,7 +711,11 @@ alternative_endif .macro phys_to_ttbr, ttbr, phys #ifdef CONFIG_ARM64_PA_BITS_52 orr \ttbr, \phys, \phys, lsr #46 + /* IAMROOT20 20231021 + * TTBR_BADDR_MASK_52 : 0x0000_ffff_ffff_fffc + */ and \ttbr, \ttbr, #TTBR_BADDR_MASK_52 + /* IAMROOT20_END 20231021 */ #else mov \ttbr, \phys #endif diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index e0b233eb0e2e5..c803a6d8f3b02 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -931,6 +931,9 @@ SYM_FUNC_END(set_cpu_boot_mode_flag) SYM_FUNC_START(__enable_mmu) mrs x3, ID_AA64MMFR0_EL1 ubfx x3, x3, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4 + /* IAMROOT20 20231021 + * MIN : 0x0, MAX : 0x7 + */ cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN b.lt __no_granule_support cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index ced12af153d58..5c87ae847cf86 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -413,8 +413,11 @@ SYM_FUNC_START(__cpu_setup) enable_dbg // since this is per-cpu reset_pmuserenr_el0 x1 // Disable PMU access from EL0 reset_amuserenr_el0 x1 // Disable AMU access from EL0 - /* IAMROOT20_END 20231014 */ - + /* IAMROOT20_END 20231014 */ /* IAMROOT20_START 20231021 */ + /* IAMROOT20 20231021 + * mair(x17) : Memory Attribute Indirection Register + * tcr(x16) : Translation Control Register + */ /* * Default values for VMSA control registers. These will be adjusted * below depending on detected CPU features. @@ -429,10 +432,17 @@ SYM_FUNC_START(__cpu_setup) tcr_clear_errata_bits tcr, x9, x5 #ifdef CONFIG_ARM64_VA_BITS_52 + /* IAMROOT20 20231021 + * x0 = 48 || 52 + * x9 = 16 || 12 + */ sub x9, xzr, x0 add x9, x9, #64 tcr_set_t1sz tcr, x9 #else + /* IAMROOT20 20231021 + * x9 = 16 + */ idmap_get_t0sz x9 #endif tcr_set_t0sz tcr, x9 From 269f1a1e98302a183e0aab079503b56c65141129 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Tue, 31 Oct 2023 21:32:05 +0900 Subject: [PATCH 007/104] IAMROOT20 20231028 - Add comments --- arch/arm64/include/asm/assembler.h | 6 +++++- arch/arm64/kernel/head.S | 19 +++++++++++++++++++ arch/arm64/kernel/pi/kaslr_early.c | 15 ++++++++++++++- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 421889aa4afb6..4e1eb8165492c 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -695,6 +695,10 @@ alternative_endif #ifdef CONFIG_ARM64_VA_BITS_52 mrs_s \tmp, SYS_ID_AA64MMFR2_EL1 and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT) + /* IAMROOT20 20231028 + * VA=52로 사용하는데, LVA(VA=52 support)가 지원되지 않는 시스템의 경우 + * VA=48로 운영하기 위해 PGD 테이블 위치를 offset만큼 더한 주소로 적용 + */ cbnz \tmp, .Lskipoffs_\@ orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET .Lskipoffs_\@ : @@ -715,7 +719,7 @@ alternative_endif * TTBR_BADDR_MASK_52 : 0x0000_ffff_ffff_fffc */ and \ttbr, \ttbr, #TTBR_BADDR_MASK_52 - /* IAMROOT20_END 20231021 */ + /* IAMROOT20_END 20231021 */ /* IAMROOT20_START 20231028 */ #else mov \ttbr, \phys #endif diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index c803a6d8f3b02..6ef67d428aee2 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -613,6 +613,9 @@ SYM_FUNC_START_LOCAL(create_idmap) SYM_FUNC_END(create_idmap) SYM_FUNC_START_LOCAL(create_kernel_mapping) + /* IAMROOT20 20231028 + * init_pg_dir에 대해 page table을 생성 + */ adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR // compile time __va(_text) #ifdef CONFIG_RELOCATABLE @@ -940,6 +943,9 @@ SYM_FUNC_START(__enable_mmu) b.gt __no_granule_support phys_to_ttbr x2, x2 msr ttbr0_el1, x2 // load TTBR0 + /* IAMROOT20 20231028 + * 임시로 reserved_pg_dir(내용이 모두 0인 tlb)을 ttbr1에 load함 + */ load_ttbr1 x1, x1, x3 set_sctlr_el1 x0 @@ -1077,9 +1083,17 @@ SYM_FUNC_START_LOCAL(__primary_switch) and x23, x23, MIN_KIMG_ALIGN - 1 #ifdef CONFIG_RANDOMIZE_BASE mov x0, x22 + /* IAMROOT20 20231028 + * sp를 설정하여 c 함수가 호출할 수 있도록 함 + */ adrp x1, init_pg_end mov sp, x1 mov x29, xzr + /* IAMROOT20 20231028 + * RAMDOMIZE_BASE가 Enable되어 있는 경우 + * 커널 이미지를 재배치 할 offset을 구함 + * KASLR(Kernel Address Space Layout Randomization) + */ bl __pi_kaslr_early_init and x24, x0, #SZ_2M - 1 // capture memstart offset seed bic x0, x0, #SZ_2M - 1 @@ -1089,8 +1103,13 @@ SYM_FUNC_START_LOCAL(__primary_switch) bl clear_page_tables bl create_kernel_mapping + /* IAMROOT20 20231028 + * __enable_mmu에서 임시로 reserved_pg_dir을 load했었는데, + * ttbr1에 init_pg_dir을 load함 + */ adrp x1, init_pg_dir load_ttbr1 x1, x1, x2 + /* IAMROOT20_END 20231028 */ #ifdef CONFIG_RELOCATABLE bl __relocate_kernel #endif diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c index 17bff6e399e46..4e26f43a2a076 100644 --- a/arch/arm64/kernel/pi/kaslr_early.c +++ b/arch/arm64/kernel/pi/kaslr_early.c @@ -88,11 +88,21 @@ static u64 get_kaslr_seed(void *fdt) asmlinkage u64 kaslr_early_init(void *fdt) { u64 seed; - + + /* IAMROOT20 20231028 + * cmdline에서 kaslr이 disable되어 있는지 확인 + */ if (is_kaslr_disabled_cmdline(fdt)) return 0; + /* IAMROOT20 20231028 + * DT에서 kaslr seed 값을 read + */ seed = get_kaslr_seed(fdt); + /* IAMROOT20 20231028 + * DT에 seed 값이 존재하지 않으면, + * RNDR 레지스터를 읽어 seed를 얻음 + */ if (!seed) { if (!__early_cpu_has_rndr() || !__arm64_rndr((unsigned long *)&seed)) @@ -106,5 +116,8 @@ asmlinkage u64 kaslr_early_init(void *fdt) * the lower and upper quarters to avoid colliding with other * allocations. */ + /* IAMROOT20 20231028 + * seed, VA_BITS_MIN을 이용하여 kaslr offset을 return + */ return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0)); } From 5146673c5c2e7a46bb205d273dc85b3e48bd5105 Mon Sep 17 00:00:00 2001 From: park-seong-su Date: Sat, 4 Nov 2023 22:46:43 +0900 Subject: [PATCH 008/104] IAMROOT20 20231104 - Add comments to 'head.S' - __relocate_kernel --- arch/arm64/kernel/head.S | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6ef67d428aee2..8b47270744ef5 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -994,6 +994,12 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) mov_q x11, KIMAGE_VADDR // default virtual offset add x11, x11, x23 // actual virtual offset + /* IAMROOT20 20231104 + * relocation table을 참조하여 R_AARCH64_RELATIVE type을 가진 모든 entry들의 + * offset + kaslr displacement 주소의 값을 addend + kaslr displacement으로 설정함 + * relocation table의 offset은 구조체 변수의 필드 위치를 가리키는 것으로 보임 + * relocation table의 addend는 심볼 주소로 보임 + */ 0: cmp x9, x10 b.hs 1f ldp x12, x13, [x9], #24 @@ -1039,6 +1045,16 @@ SYM_FUNC_START_LOCAL(__relocate_kernel) adr_l x9, __relr_start adr_l x10, __relr_end + /* IAMROOT20 20231104 + * relative relocation 과정 + * 배경: 기존의 relocation table에서 R_AARCH64_RELATIVE type이 많은 부분을 차지하고 있음 + * R_AARCH64_RELATIVE의 특징은 offset이 8bytes 크기 차이로 모여있음 + * RELA relocation의 info, type을 제거하고 offset을 압축해서 relr section을 생성 + * relative relocation entry는 address와 bitmap으로 나누어짐. entry 값이 짝수이면 address를, 홀수이면 bitmap을 의미함. + * 짝수 값 entry는 맨 처음 1개의 relocation을 담당하고 (label 2) + * bitmap은 다음부터 8bytes씩 증가되는 63개의 relocation을 진행하며 bitmap LSB 값 1은 무시 (label 3, 4, 5, 6) + * rela relocation에서 진행했던 offset + kaslr_displacement / addend + kaslr_displacement 변경 과정은 동일 + */ 2: cmp x9, x10 b.hs 7f ldr x11, [x9], #8 @@ -1110,10 +1126,19 @@ SYM_FUNC_START_LOCAL(__primary_switch) adrp x1, init_pg_dir load_ttbr1 x1, x1, x2 /* IAMROOT20_END 20231028 */ + /* IAMROOT20_START 20231104 */ #ifdef CONFIG_RELOCATABLE bl __relocate_kernel #endif + /* IAMROOT20 20231104 + * ldr x8, =__primary_switched는 pesudo instruction(literal pool)으로 + * 빌드 타임의 __primary_switched 가상 주소를 해당 명령어 주변에 셋팅함 + * 런 타임에 해당 라인이 실행되면 x8에 __primary_switched의 가상 주소를 저장하고 + * x0에 커널 이미지의 시작 물리 주소를 들고 + * x8 주소로 branch하면 TTBR1_EL1을 base 주소로 하는 init_pg_dir 페이지 테이블을 타고 커널 가상 주소 영역으로 진입 + */ ldr x8, =__primary_switched adrp x0, KERNEL_START // __pa(KERNEL_START) br x8 + /* IAMROOT20_END 20231104 */ SYM_FUNC_END(__primary_switch) From 66f501cb34d4b091176b3b1e33a0422279fd0f7d Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 11 Nov 2023 13:10:42 +0000 Subject: [PATCH 009/104] IAMROOT20 20231111 - Add comments to 'thread_info.h' - thread_info --- arch/arm64/include/asm/thread_info.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 848739c15de82..e00b0a7423c39 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -21,6 +21,10 @@ struct task_struct; /* * low level task data that entry.S needs immediate access to. */ +/* IAMROOT20 20231111 + * preempt_count: 프로세스의 컨텍스트 실행 정보와 프로세스가 선점 스케줄링 될 정보를 저장. + * - 0이면 선점 가능, 0 미만이면 bug + */ struct thread_info { unsigned long flags; /* low level flags */ #ifdef CONFIG_ARM64_SW_TTBR0_PAN From fc97e00cb20740b20a87873ab908456b8c571d10 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 11 Nov 2023 13:11:27 +0000 Subject: [PATCH 010/104] IAMROOT20 20231111 - Add comments to 'head.S' - __primary_switched, init_cpu_task --- arch/arm64/kernel/head.S | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8b47270744ef5..23024dfa3f914 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -643,7 +643,15 @@ SYM_FUNC_END(create_kernel_mapping) */ .macro init_cpu_task tsk, tmp1, tmp2 msr sp_el0, \tsk - + + /* IAMROOT20 20231111 + * tmp1 = tsk->stack + * TSK_STACK = 32 + * THREAD_SIZE = 16k + * PT_REGS_SIZE = 336 + * S_STACKFRAME = 304 + */ + /* IAMROOT20_END 20231111 */ ldr \tmp1, [\tsk, #TSK_STACK] add sp, \tmp1, #THREAD_SIZE sub sp, sp, #PT_REGS_SIZE @@ -664,6 +672,7 @@ SYM_FUNC_END(create_kernel_mapping) * * x0 = __pa(KERNEL_START) */ +/* IAMROOT20_START 20231111 */ SYM_FUNC_START_LOCAL(__primary_switched) adr_l x4, init_task init_cpu_task x4, x5, x6 From a750b13882be401d2d2fa2b5aa9ac2dce5b20d96 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 11 Nov 2023 13:12:05 +0000 Subject: [PATCH 011/104] IAMROOT20 20231111 - Add comments to 'init_task.c' - init_task --- init/init_task.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/init/init_task.c b/init/init_task.c index ff6c4b9bfe6b1..ebbd27a8ec5ab 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -61,16 +61,47 @@ unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] * Set up the first task table, touch at your own risk!. Base=0, * limit=0x1fffff (=2MB) */ +/* IAMROOT20 20231111 + * ARM64: CONFIG_ARCH_TASK_STRUCT_ON_STACK = false + * IA64: CONFIG_ARCH_TASK_STRUCT_ON_STACK = true + * true의 경우, __init_task_data 매크로는 __section(".data..init_task")을 가리킨다. + * false의 경우, __init_task_data는 아무것도 가리키지 않는다. + */ struct task_struct init_task #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK __init_task_data #endif +/* IAMROOT20 20231111 + * 아래 구조체를 L1_CACHE_BYTES 크기로 정렬한다. + * L1_CACHE_BYTE = 1 << 6 = 64 byte + */ __aligned(L1_CACHE_BYTES) = { +/* IAMROOT20 20231111 + * CONFIG_THREAD_INFO_IN_TASK = true + * INIT_THREAD_INFO : + * 1) 현재 CPU의 FP 처리 활성 상태 + * 2) CPU의 선점 가능 여부 + * 3) shadow call stack 활성 시, shadow call stack에 대한 정보 + * REFCOUNT_INIT : 1 + */ #ifdef CONFIG_THREAD_INFO_IN_TASK .thread_info = INIT_THREAD_INFO(init_task), .stack_refcount = REFCOUNT_INIT(1), #endif +/* IAMROOT20 20231111 + * __state: 프로세스의 상태를 저장 + * - 0x00000000: TASK_RUNNING + * init_stack: THREAD_SHIFT로 정렬된 data 섹션의 위치 + * flags: 프로세스의 세부 실행 상태 + * - PK_KTHREAD: 커널 스레드임을 의미 + * MAX_PRIO: 우선순위 최대값 + * CPU_MASK_ALL: cpumask_t의 모든 배열의 요소마다 모든 비트가 1로 설정된 비트 필드를 설정. + * NR_CPUS: arm64에는 256으로 정의되어 있음 + * tasks: 커널에서 구동 중인 모든 프로세스 중 가장 최상위 프로세스의 태스크 + * active_mm: init_mm 구조체 설정 + * - .pgd = init_pg_dir + */ .__state = 0, .stack = init_stack, .usage = REFCOUNT_INIT(2), @@ -102,6 +133,10 @@ struct task_struct init_task #ifdef CONFIG_CGROUP_SCHED .sched_task_group = &root_task_group, #endif +/* IAMROOT20 20231111 + * INIT_TASK_COMM: "swapper" + * children, sibling: 연결리스트로, 현재 자기 자신을 next와 prev에 설정한다. + */ .ptraced = LIST_HEAD_INIT(init_task.ptraced), .ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry), .real_parent = &init_task, From 769c1b8dfc84d7069e61d8eb78d491d6d5d45c48 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 18 Nov 2023 14:18:28 +0000 Subject: [PATCH 012/104] IAMROOT20 20231118 - Add comments to 'entry.S' - vectors --- arch/arm64/kernel/entry.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ab2a6e33c0528..5d1fd2b1f7bf7 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -505,6 +505,14 @@ tsk .req x28 // current thread_info /* * Exception vectors. + */ +/* IAMROOT20 20231118 + * kernel_ventry el, ht, regsize, label + * el : exception level + * ht : sp0 공유 여부 + * - t(thread) : sp0 공유 + * - h(handler): sp0 공유하지 않음 + * label : exception 라벨 */ .pushsection ".entry.text", "ax" From 54cff240efa50a605aaf73af0829313ad150b4a1 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 18 Nov 2023 14:19:28 +0000 Subject: [PATCH 013/104] IAMROOT20 20231118 - Add comments to 'head.S' - __primary_switched --- arch/arm64/kernel/head.S | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 23024dfa3f914..371b9167a6bbc 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -651,7 +651,7 @@ SYM_FUNC_END(create_kernel_mapping) * PT_REGS_SIZE = 336 * S_STACKFRAME = 304 */ - /* IAMROOT20_END 20231111 */ + /* IAMROOT20_END 20231111 */ /* IAMROOT20_START 20231118 */ ldr \tmp1, [\tsk, #TSK_STACK] add sp, \tmp1, #THREAD_SIZE sub sp, sp, #PT_REGS_SIZE @@ -661,6 +661,11 @@ SYM_FUNC_END(create_kernel_mapping) scs_load_current + /* IAMROOT20 20231118 + * tmp1 = __per_cpu_offset[NR_CPUS] + * tmp2 = tsk.thread_info.cpu + * tmp1 = __per_cpu_offset[tmp2] + */ adr_l \tmp1, __per_cpu_offset ldr w\tmp2, [\tsk, #TSK_TI_CPU] ldr \tmp1, [\tmp1, \tmp2, lsl #3] @@ -681,11 +686,19 @@ SYM_FUNC_START_LOCAL(__primary_switched) msr vbar_el1, x8 // vector table address isb +/* IAMROOT20 20231118 + * x29 : stack frame + * x30 : link register + */ stp x29, x30, [sp, #-16]! mov x29, sp str_l x21, __fdt_pointer, x5 // Save FDT pointer +/* IAMROOT20 20231118 + * kimage_vaddr : 커널 이미지의 가상 주소의 시작 (0xffff800008000000) + */ +/* IAMROOT20_END 20231118 */ ldr_l x4, kimage_vaddr // Save the offset between sub x4, x4, x0 // the kernel virtual and str_l x4, kimage_voffset, x5 // physical mappings From 381e662d0d69d5b647db3b980718bff5fc18d2c1 Mon Sep 17 00:00:00 2001 From: mibchan Date: Sat, 25 Nov 2023 18:15:45 +0900 Subject: [PATCH 014/104] IAMROOT20 20231125 - Add comments to 'head.S' __primary_switched --- arch/arm64/kernel/head.S | 23 +++++++++++++++++++++++ arch/arm64/mm/fixmap.c | 6 +++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 371b9167a6bbc..b585e156a29e1 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -699,6 +699,7 @@ SYM_FUNC_START_LOCAL(__primary_switched) * kimage_vaddr : 커널 이미지의 가상 주소의 시작 (0xffff800008000000) */ /* IAMROOT20_END 20231118 */ +/* IAMROOT20_START 20231125 */ ldr_l x4, kimage_vaddr // Save the offset between sub x4, x4, x0 // the kernel virtual and str_l x4, kimage_voffset, x5 // physical mappings @@ -707,13 +708,24 @@ SYM_FUNC_START_LOCAL(__primary_switched) bl set_cpu_boot_mode_flag // Clear BSS + /* IAMROOT20 20231125 + * bss 영역을 0으로 초기화 (init_pg_dir 아래에 위치) + */ adr_l x0, __bss_start mov x1, xzr adr_l x2, __bss_stop sub x2, x2, x0 bl __pi_memset + /* IAMROOT20 20231125 + * ISHST : 스토어가 마무리되기를 기다리는 동작 (Store - Store) + * domain : Inner Sharable domain + */ dsb ishst // Make zero page visible to PTW +/* IAMROOT20 20231125 + * 가상주소 52bit를 지원하면 vabits_actual = 52 + * 지원하지 않는다면 vabits_actual = 48 + */ #if VA_BITS > 48 adr_l x8, vabits_actual // Set this early so KASAN early init str x25, [x8] // ... observes the correct value @@ -913,8 +925,19 @@ SYM_FUNC_END(__secondary_too_slow) * in w0. See arch/arm64/include/asm/virt.h for more info. */ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag) + /* IAMROOT20 20231125 + * EL2일 경우 + * u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL2 }; + * x1 x1 + * EL1일 경우 + * u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL1, BOOT_CPU_MODE_EL1 }; + * x1 + */ adr_l x1, __boot_cpu_mode cmp w0, #BOOT_CPU_MODE_EL2 + /*IAMROOT20 20231125 + * EL1인 경우 label 1로 이동 + */ b.ne 1f add x1, x1, #4 1: str w0, [x1] // Save CPU boot mode diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index c0a3301203bdf..66445c572e9ec 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -99,7 +99,11 @@ void __init early_fixmap_init(void) { unsigned long addr = FIXADDR_TOT_START; unsigned long end = FIXADDR_TOP; - + + /* IAMROOT20 20231125 + * *pgdp = swapper_pgdir + pgd + * *p4dp = *pgdp; + */ pgd_t *pgdp = pgd_offset_k(addr); p4d_t *p4dp = p4d_offset(pgdp, addr); From 78bcb4ecfefb3ff9c0e6ef213209ce94fbc00aa0 Mon Sep 17 00:00:00 2001 From: fehead Date: Sat, 2 Dec 2023 12:11:09 +0900 Subject: [PATCH 015/104] IAMROOT20 Add comments for early_fixmap_init --- arch/arm64/include/asm/fixmap.h | 15 +++++++++++++++ arch/arm64/include/asm/memory.h | 21 +++++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 21 +++++++++++++++++++++ include/linux/mm_types.h | 4 ++++ include/linux/pgtable.h | 5 +++++ 5 files changed, 66 insertions(+) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 58c294a966768..62f77b82e957f 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -67,6 +67,10 @@ enum fixed_addresses { FIX_ENTRY_TRAMP_TEXT1, #define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ + /* + * IAMROOT20 20231130: + * __end_of_permanent_fixed_addresses 523 + */ __end_of_permanent_fixed_addresses, /* @@ -89,9 +93,20 @@ enum fixed_addresses { FIX_PUD, FIX_PGD, + /* + * IAMROOT20 20231130: + * __end_of_fixed_addresses 975 + */ __end_of_fixed_addresses }; +/* + * IAMROOT20 20231130: + * FIXADDR_SIZE 0x0020_b000 SZ_523_PAGE + * FIXADDR_START 0xffff_fbff_fddf_5000 + * FIXADDR_TOT_SIZE 0x003c_f000 SZ_975_PAGE + * FIXADDR_TOT_START 0xffff_fbff_fdc3_1000 + */ #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) #define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c735afdf639b1..7dad95c1b99a9 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -30,6 +30,12 @@ * keep a constant PAGE_OFFSET and "fallback" to using the higher end * of the VMEMMAP where 52-bit support is not available in hardware. */ + /* + * IAMROOT20 20231129: + * exam) VA_BITS 48, VA_BITS_MIN 48 + * VMEMMAP_SHIFT 6 + * VMEMMAP_SIZE 0x0200_0000_0000 SZ_2T + */ #define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT) @@ -40,6 +46,21 @@ * KIMAGE_VADDR - the virtual address of the start of the kernel image. * VA_BITS - the maximum number of bits for virtual addresses. */ + /* + * IAMROOT20 20231129: + * VA_BITS 48 + * VA_BITS_MIN 48 + * PAGE_OFFSET 0xffff_0000_0000_0000 + * KIMAGE_VADDR 0xffff_8000_0800_0000 + * MODULES_END 0xffff_8000_0800_0000 + * MODULES_VADDR 0xffff_8000_0000_0000 + * MODULES_VSIZE 0x0800_0000 SZ_128M + * VMEMMAP_START 0xffff_fc00_0000_0000 + * VMEMMAP_END 0xffff_fe00_0000_0000 + * PCI_IO_END 0xffff_fbff_ff80_0000 + * PCI_IO_START 0xffff_fbff_fe80_0000 + * FIXADDR_TOP 0xffff_fbff_fe00_0000 + */ #define VA_BITS (CONFIG_ARM64_VA_BITS) #define _PAGE_OFFSET(va) (-(UL(1) << (va))) #define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 4d1a475b0bfa2..f006bb6c313ba 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -51,6 +51,11 @@ * exam) 39 VA_BITS, 4k * PMD_SHIFT 21 * PMD_SIZE SZ_2M + * exam) 48 VA_BITS, 4k + * PMD_SHIFT 21 + * PMD_SIZE SZ_2M + * PMD_MASK 0xffff_ffff_ffe0_0000 + * PTRS_PER_PMD 512 */ #define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) @@ -62,6 +67,14 @@ * PUD_SHIFT determines the size a level 1 page table entry can map. */ #if CONFIG_PGTABLE_LEVELS > 3 + /* + * IAMROOT20 20231201: + * exam) 48 VA_BITS, 4k + * PUD_SHIFT 30 + * PUD_SIZE SZ_1G + * PUD_MASK 0xffff_ffff_c000_0000 + * PTRS_PER_PUD 512 + */ #define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) @@ -79,6 +92,14 @@ * PGDIR_SIZE : 1 << 25 SZ_32M * PGDIR_MASK : 0xffff_ffff_fe00_0000 * PTRS_PER_PGD : 11 (36 - 25) + * exam) VA_BITS == 52 on 64k(16bit) CONFIG_PGTABLE_LEVELS = 3 + PGDIR_SHIFT : 42 + PGDIR_SIZE : SZ_4T + * exam) VA_BITS == 48 on 4k CONFIG_PGTABLE_LEVELS = 4 + * PGDIR_SHIFT 39 + * PGDIR_SIZE SZ_512G + * PGDIR_MASK 0xffff_ffc0_0000_0000 + * PTRS_PER_PGD 512 */ #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 306a3d1a0fa65..5aa82abe6eff1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -396,6 +396,10 @@ FOLIO_MATCH(compound_head, _head_2); /* * Used for sizing the vmemmap region on some architectures */ + /* + * IAMROOT20 20231129: + * STRUCT_PAGE_MAX_SHIFT 6 + */ #define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page))) #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index c5a51481bbb90..ac560eb0c9d49 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -83,6 +83,11 @@ static inline unsigned long pud_index(unsigned long address) #ifndef pgd_index /* Must be a compile-time constant, so implement it as a macro */ +/* + * IAMROOT20 20231130: + * exam) VA_BITS 48, PAGE_SIZE 4k + * pgd_index(a) a >> 39 & 511 + */ #define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) #endif From 288917dc8714d8f8bf895138f86d7e70ba89a630 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 2 Dec 2023 12:47:41 +0900 Subject: [PATCH 016/104] IAMROOT20 20231125 - Add comments --- arch/arm64/mm/fixmap.c | 3 ++- mm/init-mm.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index 66445c572e9ec..62953180cc5f9 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -86,6 +86,7 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); pudp = pud_offset_kimg(p4dp, addr); + /* IAMROOT20_END 20231125 */ early_fixmap_init_pmd(pudp, addr, end); } @@ -101,7 +102,7 @@ void __init early_fixmap_init(void) unsigned long end = FIXADDR_TOP; /* IAMROOT20 20231125 - * *pgdp = swapper_pgdir + pgd + * *pgdp = init_pg_dir + pgd * *p4dp = *pgdp; */ pgd_t *pgdp = pgd_offset_k(addr); diff --git a/mm/init-mm.c b/mm/init-mm.c index efa97b57acfd8..93a5d1c93cb03 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -27,6 +27,9 @@ * Since there is only one init_mm in the entire system, keep it simple * and size this cpu_bitmask to NR_CPUS. */ +/* IAMROOT20 20231125 + * arm64에서는 .pgd에 swapper_pg_dir대신 init_pg_dir로 설정한다. + */ struct mm_struct init_mm = { .mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, init_mm.mmap_lock), .pgd = swapper_pg_dir, From 196cb94afeddc07da171d43cc1eeaaa6a8c45003 Mon Sep 17 00:00:00 2001 From: fehead Date: Sat, 2 Dec 2023 22:32:10 +0900 Subject: [PATCH 017/104] IAMROOT20 20231202 Add comments for early_fixmap_init --- arch/arm64/include/asm/memory.h | 16 +++++++- arch/arm64/include/asm/page-def.h | 6 +++ arch/arm64/include/asm/pgalloc.h | 6 +++ arch/arm64/include/asm/pgtable-hwdef.h | 9 +++++ arch/arm64/include/asm/pgtable.h | 51 ++++++++++++++++++++++++++ arch/arm64/mm/fixmap.c | 3 +- 6 files changed, 88 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 7dad95c1b99a9..9d0e7df5a01c3 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -33,8 +33,8 @@ /* * IAMROOT20 20231129: * exam) VA_BITS 48, VA_BITS_MIN 48 - * VMEMMAP_SHIFT 6 - * VMEMMAP_SIZE 0x0200_0000_0000 SZ_2T + * VMEMMAP_SHIFT 6 + * VMEMMAP_SIZE 0x0200_0000_0000 SZ_2T */ #define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT) @@ -98,6 +98,10 @@ #define KASAN_THREAD_SHIFT 1 #else #define KASAN_THREAD_SHIFT 0 +/* + * IAMROOT20 20231202: + * PAGE_END 0xffff_8000_0000_0000 + */ #define PAGE_END (_PAGE_END(VA_BITS_MIN)) #endif /* CONFIG_KASAN */ @@ -306,6 +310,10 @@ static inline const void *__tag_set(const void *addr, u8 tag) * lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the * kernel's TTBR1 address range. */ + /* + * IAMROOT20 20231202: + * __is_lm_address(addr) => PAGE_OFFSET <= addr < PAGE_END + */ #define __is_lm_address(addr) (((u64)(addr) - PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) #define __lm_to_phys(addr) (((addr) - PAGE_OFFSET) + PHYS_OFFSET) @@ -356,6 +364,10 @@ static inline void *phys_to_virt(phys_addr_t x) * Drivers should NOT use these either. */ #define __pa(x) __virt_to_phys((unsigned long)(x)) + /* + * IAMROOT20 20231202: + * __pa_symbol(x) -> (x - kimage_voffset) + */ #define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0)) #define __pa_nodebug(x) __virt_to_phys_nodebug((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index 2403f7b4cdbfb..1be672f578563 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -11,6 +11,12 @@ #include /* PAGE_SHIFT determines the page size */ +/* IAMROOT20 20231202 + * exam) VA_BITS 48, PAGE_SHIFT 12 + * PAGE_SHIFT 12 + * PAGE_SIZE SZ_4K + * PAGE_MASK 0XFFFF_FFFF_FFFF_F800 + */ #define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 237224484d0f6..ecdd1ebb124be 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -22,6 +22,9 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) { + /* IAMROOT20 20231202 + * pudp = (pmdp | prot) + */ set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); } @@ -41,6 +44,9 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) #if CONFIG_PGTABLE_LEVELS > 3 +/** IAMROOT20 20231202 + * *p4dp = (pudp | prot); + */ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) { set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index f006bb6c313ba..e107abc29ee06 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -195,6 +195,9 @@ * CONFIG_ARM64_PA_BITS_52 일경우 * PAGE_SHIFT 16 * PTE_ADDR_LOW 0x0000_ffff_ffff_0000 ((1 << (48 - 16)) - 1) << 16 + * CONFIG_ARM64_PA_BITS_48 일경우 + * PAGE_SHIFT 12 + * PTE_ADDR_LOW 0x0000_ffff_ffff_f000 ((1 << (48 - 12)) - 1) << 12 */ #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 @@ -207,6 +210,12 @@ #define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) #define PTE_ADDR_HIGH_SHIFT 36 #else +/* + * IAMROOT20 20231202: + * CONFIG_ARM64_PA_BITS_48 일경우 + * PAGE_SHIFT 12 + * PTE_ADDR_MASK 0x0000_ffff_ffff_f000 + */ #define PTE_ADDR_MASK PTE_ADDR_LOW #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0bd18de9fd97b..bbee8782b3796 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -76,11 +76,23 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_ARM64_PA_BITS_52 static inline phys_addr_t __pte_to_phys(pte_t pte) { + /* IAMROOT20 20231202 + * pte & 0x0000_ffff_ffff_0000 | (pte & 0xf000) << 36 + * exam) + * pte = 0x0000_ABCD_EF01_9000 + * return 0x0009_ABCD_EF01_0000 + */ return (pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT); } static inline pteval_t __phys_to_pte_val(phys_addr_t phys) { + /* IAMROOT20 20231202 + * (phys | (phys >> 36)) & 0x0000_ffff_ffff_f000 + * exam) + * phys = 0x0009_ABCD_EF01_0000 + * return 0x0000_ABCD_EF01_9000 + */ return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK; } #else @@ -124,6 +136,9 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) +/* IAMROOT20 20231202 + * return pte & PTE_VALID; + */ #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) /* * Execute-only user mappings do not have the PTE_USER bit set. All valid @@ -381,6 +396,10 @@ static inline pte_t pgd_pte(pgd_t pgd) return __pte(pgd_val(pgd)); } +/* + * IAMROOT20 20231202: + * return p4d + */ static inline pte_t p4d_pte(p4d_t p4d) { return __pte(p4d_val(p4d)); @@ -602,8 +621,14 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, static inline bool pud_sect(pud_t pud) { return false; } static inline bool pud_table(pud_t pud) { return true; } #else +/* IAMROOT20 20231202 + * pud_sect(pud) => (pud & 3) == 1 + */ #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_SECT) +/* IAMROOT20 20231202 + * pud_table(pud) => (pud & 3) == 3 + */ #define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_TABLE) #endif @@ -678,6 +703,15 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) +/* IAMROOT20 20231202 + * pud_none(pud) => (pud == 0) + * pud_bed(pud) => (pud & 3 != 3) + * pud_present(pud) => (pud & (PTE_VALID | PTE_PROT_NONE)) + * pud_leaf(pud) => (pud_present(pud) && !pud_table(pud)) + * pud_valid(pud) => (pud & 1) + * pud_user(pud) => (pud & PTE_USER) + * pud_user_exec(pud) => !(pud & PTE_UXN) + */ #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!pud_table(pud)) #define pud_present(pud) pte_present(pud_pte(pud)) @@ -695,6 +729,9 @@ static inline void set_pud(pud_t *pudp, pud_t pud) } #endif /* __PAGETABLE_PUD_FOLDED */ + /* IAMROOT20 20231202 + * *pudp = pud + */ WRITE_ONCE(*pudp, pud); if (pud_valid(pud)) { @@ -749,6 +786,12 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) +/* + * IAMROOT20 20231202: + * p4d_none p4d == 0 + * p4d_bad !(p4d & 2) + * p4d_present p4d != 0 + */ #define p4d_none(p4d) (!p4d_val(p4d)) #define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) #define p4d_present(p4d) (p4d_val(p4d)) @@ -760,6 +803,11 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) return; } + /* IAMROOT20 20231202 + * *p4dp = p4d; + * dsb ishst + * isb + */ WRITE_ONCE(*p4dp, p4d); dsb(ishst); isb(); @@ -781,6 +829,9 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) } /* Find an entry in the first-level page table. */ +/* IAMROOT20 20231202 + * p4d_page_paddr(*dir) + pud_index(addr) * sizeof(pud_t) + */ #define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index 62953180cc5f9..13ca17164069c 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -57,6 +57,7 @@ static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr, if (pud_none(pud)) __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); + /* IAMROOT20_END 20231202 */ pmdp = pmd_offset_kimg(pudp, addr); do { @@ -86,7 +87,7 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); pudp = pud_offset_kimg(p4dp, addr); - /* IAMROOT20_END 20231125 */ + /* IAMROOT20_END 20231125 */ /* IAMROOT20_START 20231202j */ early_fixmap_init_pmd(pudp, addr, end); } From 026b5a9d08d955153175e8c058e67220d883e7fb Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 9 Dec 2023 22:09:14 +0900 Subject: [PATCH 018/104] IAMROOT20 20231209 - Add comments --- arch/arm64/include/asm/kernel-pgtable.h | 3 +++ arch/arm64/include/asm/pgalloc.h | 5 +++++ arch/arm64/include/asm/pgtable-hwdef.h | 8 ++++++++ arch/arm64/include/asm/pgtable.h | 7 ++++++- arch/arm64/kernel/setup.c | 5 ++++- arch/arm64/mm/fixmap.c | 14 +++++++++++--- arch/arm64/mm/mmu.c | 19 ++++++++++++++++++- include/asm-generic/fixmap.h | 4 ++++ 8 files changed, 59 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index fd258fa4c6288..d03bbceaf5c22 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -63,6 +63,9 @@ #define EARLY_KASLR (0) #endif +/* IAMROOT20 20231209 + * vstart ~ vend 에서 shift 크기가 몇 개 들어갈 수 있는지를 구함 + */ #define SPAN_NR_ENTRIES(vstart, vend, shift) \ ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index ecdd1ebb124be..6a7fbcc5de960 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -72,6 +72,11 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, pmdval_t prot) { + /* IAMROOT20 20231209 + * __pmd(__phys_to_pmd_val(ptep) | prot = ptep | prot + * + * *pmdp = ptep | prot + */ set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot)); } diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index e107abc29ee06..e81497ae961f5 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -114,6 +114,14 @@ #define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) #define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) +/* IAMROOT20 20231209 + * CONFIG_ARM64_CONT_PMD_SHIFT = 4(arm64 defconfig) + * exam) 4K, 4-level + * CONT_PMD_SHIFT = 4 + 21 + * CONT_PMDS = (1 << 4) = 16 + * CONT_PMD_SIZE = 16 * SZ_2M = 32MB + * CONT_PMD_MASK = ~(32M - 1) + */ #define CONT_PMD_SHIFT (CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT) #define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index bbee8782b3796..22a5870016d61 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -101,6 +101,9 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #endif #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT) +/* IAMROOT20 20231209 + * pfn_pte(pfn,prot) = (pfn << PAGE_SHIFT) | prot + */ #define pfn_pte(pfn,prot) \ __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) @@ -656,7 +659,9 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) return; } #endif /* __PAGETABLE_PMD_FOLDED */ - + /* IAMROOT20 20231209 + * *pmdp = pmd + */ WRITE_ONCE(*pmdp, pmd); if (pmd_valid(pmd)) { diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b8ec7b3ac9cbe..1c40cbe733dbc 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -176,7 +176,10 @@ void __init *get_early_fdt_ptr(void) asmlinkage void __init early_fdt_map(u64 dt_phys) { int fdt_size; - + + /* IAMROOT20 20231209 + * fixmap 영역을 init_pg_dir, bm_pud, bm_pmd, bm_pte를 이용하여 매핑한다. + */ early_fixmap_init(); early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL); } diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index 13ca17164069c..a330bd4dca476 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -25,9 +25,14 @@ static_assert(NR_BM_PMD_TABLES == 1); #define __BM_TABLE_IDX(addr, shift) \ (((addr) >> (shift)) - (FIXADDR_TOT_START >> (shift))) - +/* IAMROOT20 20231209 + * BM_PTE 테이블에서 addr이 가리키는 index를 찾는다 + */ #define BM_PTE_TABLE_IDX(addr) __BM_TABLE_IDX(addr, PMD_SHIFT) - +/* IAMROOT20 20231209 + * exam) 4KB / 4 level + * -> bm_pte[2][512] + */ static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss; static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; @@ -57,7 +62,7 @@ static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr, if (pud_none(pud)) __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); - /* IAMROOT20_END 20231202 */ + /* IAMROOT20_END 20231202 */ /* IAMROOT20_START 20231209 */ pmdp = pmd_offset_kimg(pudp, addr); do { @@ -148,6 +153,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) * fields of the FDT header after mapping the first chunk, double check * here if that is indeed the case. */ + /* IAMROOT20 20231209 + * MIN_FDT_ALIGN은 최소 8이어야 한다 + */ BUILD_BUG_ON(MIN_FDT_ALIGN < 8); if (!dt_phys || dt_phys % MIN_FDT_ALIGN) return NULL; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index af6bc8403ee46..4816b42102d42 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -300,9 +300,16 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, next = pmd_cont_addr_end(addr, end); /* use a contiguous mapping if the range is suitably aligned */ + /* IAMROOT20 20231209 + * CONT_PMD_MASK = ~(32MB - 1) + * + * addr, next, phys가 모두 32MB 로 정렬되어 있는지 확인 + * NO_CONT_MAPPINGS 플래그가 set되어 있지 않는지 확인 + */ if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + /* IAMROOT20_END 20231209 */ init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags); @@ -332,7 +339,11 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, p4d = READ_ONCE(*p4dp); } BUG_ON(p4d_bad(p4d)); - + + /* IAMROOT20 20231209 + * FIX_PUD를 bm_pud에 매핑한다 + * pudp = virt(FIX_PUD) + (phys_addr & (PAGE_SIZE - 1)) + */ pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -382,8 +393,14 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) return; + /* IAMROOT20 20231209 + * virt, phys 주소를 page size만큼 round down + */ phys &= PAGE_MASK; addr = virt & PAGE_MASK; + /* IAMROOT20 20231209 + * virt + size 주소를 page size 단위로 round up + */ end = PAGE_ALIGN(virt + size); do { diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index 8cc7b09c1bc71..89e638435e430 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -71,6 +71,10 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) #endif /* Return a pointer with offset calculated */ +/* IAMROOT20 20231209 + * idx에 해당하는 fixmap(bm_pte[][])에 (phys | flags)를 write하고 + * idx에 해당하는 (virtual 주소 + offset)를 return + */ #define __set_fixmap_offset(idx, phys, flags) \ ({ \ unsigned long ________addr; \ From 9670d3784da445af2dd23b32ddcc2f403db56a01 Mon Sep 17 00:00:00 2001 From: fehead Date: Sat, 16 Dec 2023 22:11:46 +0900 Subject: [PATCH 019/104] IAMROOT20 20231216 Add comments for fixmap_remap_fdt, init_feature_override --- arch/arm64/include/asm/pgtable-hwdef.h | 7 +++++++ arch/arm64/kernel/idreg-override.c | 28 ++++++++++++++++++++++++++ arch/arm64/mm/fixmap.c | 18 ++++++++++++++++- arch/arm64/mm/mmu.c | 19 ++++++++++++++++- include/asm-generic/fixmap.h | 8 ++++++++ include/linux/bits.h | 5 +++++ include/linux/pgtable.h | 5 +++++ scripts/dtc/libfdt/libfdt.h | 7 +++++++ 8 files changed, 95 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index e81497ae961f5..fc541758192ab 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -109,6 +109,13 @@ /* * Contiguous page definitions. */ +/* IAMROOT20 20231216 + * exam) VA_BITS == 48, 4k + * CONT_PTE_SHIFT 16 + * CONT_PTES 16 + * CONT_PTE_SIZE SZ_64K + * CONT_PTE_MASK 0xffff_ffff_ffff_0000 + */ #define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT) #define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT)) #define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 370ab84fd06e2..b1f1780f9956a 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -185,12 +185,24 @@ static int __init parse_nokaslr(char *unused) } early_param("nokaslr", parse_nokaslr); +/* IAMROOT20 20231216 + * return 값이 0이면 성공적으로 찾음. 그외의 값이면 실패. + */ static int __init find_field(const char *cmdline, const struct ftr_set_desc *reg, int f, u64 *v) { char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2]; int len; + /* IAMROOT20 20231216 + * exam) cmdline = "kaslr.disabled=1" + * reg->name = "kaslr" + * reg->fields[0].name = "disabled" + * ==> opt = "kaslr.disabled=" + * ==> len = 15 + * + * ==> kstrtou64("1", 0, v); + */ len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=", reg->name, reg->fields[f].name); @@ -231,9 +243,18 @@ static void __init match_options(const char *cmdline) continue; } + /* IAMROOT20 20231216 + * exam) kaslr.override->val = 0, kaslr.override->mask = 0 + * v = 1 + * mask = 0x0f + * regs[i]->override->val = 0 + * regs[i]->override->val |= (1 << 0) & 0x0f + * regs[i]->override->mask |= 0x0f + */ regs[i]->override->val &= ~mask; regs[i]->override->val |= (v << shift) & mask; regs[i]->override->mask |= mask; + /* IAMROOT20_END 20231216 */ return; } @@ -293,6 +314,13 @@ static __init const u8 *get_bootargs_cmdline(void) static __init void parse_cmdline(void) { + /* IAMROOT20 20231216 + * \ { + * chosen { + * bootargs = "console=ttyS1,115200 earlyprintk"; + * } + * }; + */ const u8 *prop = get_bootargs_cmdline(); if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop) diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index a330bd4dca476..1eb715337667e 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -23,6 +23,10 @@ static_assert(NR_BM_PMD_TABLES == 1); +/* IAMROOT20 20231216 + * FIX_PMD vaddr : 0xfffffbfffdc34000, shift : 21 + * (0xfffffbfffdc34000 >> 21) - (0xffff_fbff_fdc3_1000 >> 21) = 0 + */ #define __BM_TABLE_IDX(addr, shift) \ (((addr) >> (shift)) - (FIXADDR_TOT_START >> (shift))) /* IAMROOT20 20231209 @@ -39,6 +43,10 @@ static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; static inline pte_t *fixmap_pte(unsigned long addr) { + /* IAMROOT20 20231216 + * idx = FIX_PMD 일 경우 + * addr = 0xfffffbfffdc34000 &bm_pte[0][52] 를 리턴 + */ return &bm_pte[BM_PTE_TABLE_IDX(addr)][pte_index(addr)]; } @@ -92,7 +100,7 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); pudp = pud_offset_kimg(p4dp, addr); - /* IAMROOT20_END 20231125 */ /* IAMROOT20_START 20231202j */ + /* IAMROOT20_END 20231125 */ /* IAMROOT20_START 20231202 */ early_fixmap_init_pmd(pudp, addr, end); } @@ -129,9 +137,17 @@ void __set_fixmap(enum fixed_addresses idx, BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + /* IAMROOT20 20231216 + * idx = FIX_PMD 일 경우 + * addr = 0xfffffbfffdc34000 이며 + * bm_pte[0][52]의 주소를 가져옴. + */ ptep = fixmap_pte(addr); if (pgprot_val(flags)) { + /* IAMROOT20 20231216 + * FIX_P*D의 가상주소(ptep)에 bm_p*d(phys)를 매핑한다. + */ set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); } else { pte_clear(&init_mm, addr, ptep); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4816b42102d42..43d0a2638b11a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -219,6 +219,16 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, do { pgprot_t __prot = prot; + /* IAMROOT20 20231216 + * granule size | cont PTE | cont PMD | + * -------------+------------+------------+ + * 4 KB | 64 KB | 32 MB | + * 16 KB | 2 MB | 1 GB* | + * 64 KB | 2 MB | 16 GB* | + * + * 간략히 설명하자면 아래와 같다. + * next = min(addr + (cont PTE), end); + */ next = pte_cont_addr_end(addr, end); /* use a contiguous mapping if the range is suitably aligned */ @@ -239,10 +249,17 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, unsigned long next; pmd_t *pmdp; + /* IAMROOT20 20231216 + * FIX_PMD 가장주소를 매핑 + */ pmdp = pmd_set_fixmap_offset(pudp, addr); do { pmd_t old_pmd = READ_ONCE(*pmdp); + /* IAMROOT20 20231216 + * exam) addr: 0xfffffbfffddfe000 end: 0xfffffbfffde11000 + * next: 0xfffffbfffde00000 + */ next = pmd_addr_end(addr, end); /* try section mapping first */ @@ -309,7 +326,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - /* IAMROOT20_END 20231209 */ + /* IAMROOT20_END 20231209 */ /* IAMROOT20_START 20231216 */ init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags); diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index 89e638435e430..42cc6bf037af3 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -18,6 +18,14 @@ #include #include +/* IAMROOT20 20231216 + * FIX_PMD 971 + * 0xfffffbfffdc35000 = 0xffff_fbff_fe00_0000 - (971 << PAGE_SHIFT) + * FIX_PMD 972 + * 0xfffffbfffdc34000 = 0xffff_fbff_fe00_0000 - (972 << PAGE_SHIFT) + * FIX_PUD 973 + * 0xfffffbfffdc33000 = 0xffff_fbff_fe00_0000 - (973 << PAGE_SHIFT) + */ #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) diff --git a/include/linux/bits.h b/include/linux/bits.h index 7c0cf5031abe8..23b766f1ac117 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -39,6 +39,11 @@ #define __GENMASK_ULL(h, l) \ (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +/* IAMROOT20 20231216 + * 64bit 값중 lbit부터 h비트값이 1인 마스크값을 만든다. + * exam) GENMASK( 7, 4) => 0x0000_0000_00f0 + * GENMASK(11, 8) => 0x0000_0000_0f00 + */ #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ac560eb0c9d49..ce8d6ced054d4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -851,6 +851,11 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) #endif #ifndef pmd_addr_end +/* IAMROOT20 20231216 + * exam) addr: 0xffff_fbff_fddf_e000 end: 0xffff_fbff_fde1_1000 + * (0xfffffbfffddfe000 + SZ_2M ) & 0xffff_ffff_ffe0_0000 + * = 0xffff_fbff_fde0_0000 + */ #define pmd_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ (__boundary - 1 < (end) - 1)? __boundary: (end); \ diff --git a/scripts/dtc/libfdt/libfdt.h b/scripts/dtc/libfdt/libfdt.h index 77ccff19911ef..44da80cf01999 100644 --- a/scripts/dtc/libfdt/libfdt.h +++ b/scripts/dtc/libfdt/libfdt.h @@ -142,6 +142,9 @@ static inline uint32_t fdt32_ld(const fdt32_t *p) { const uint8_t *bp = (const uint8_t *)p; + /* IAMROOT20 20231216 + * 0xedfe0dd0 -> 0xd00dfeed + */ return ((uint32_t)bp[0] << 24) | ((uint32_t)bp[1] << 16) | ((uint32_t)bp[2] << 8) @@ -246,6 +249,10 @@ int fdt_next_subnode(const void *fdt, int offset); /**********************************************************************/ #define fdt_get_header(fdt, field) \ (fdt32_ld(&((const struct fdt_header *)(fdt))->field)) +/* IAMROOT20 20231216 + * fdt_magic -> fdt32_ld(&fdt->magic) + * fdt_totalsize -> fdt32_ld(&fdt->totalsize) + */ #define fdt_magic(fdt) (fdt_get_header(fdt, magic)) #define fdt_totalsize(fdt) (fdt_get_header(fdt, totalsize)) #define fdt_off_dt_struct(fdt) (fdt_get_header(fdt, off_dt_struct)) From 4b8a60fc75a3cb5d62be668d1533a5e22aa628ec Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 23 Dec 2023 12:59:00 +0000 Subject: [PATCH 020/104] IAMROOT20 20231223 - Add comments to 'el2_setup.h' --- arch/arm64/include/asm/el2_setup.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 037724b19c5c8..5734f70a9aeca 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -234,6 +234,12 @@ .endm #endif +/* IAMROOT20 20231223 + * override된 필드를 확인하여, 각 필드와 연결된 레지스터의 값을 업데이트 한다. + * SVE, SME 관련 작업들이 주로 업데이트 됨. + * - SVE, SME와 관련된 작업을 할 경우, Trap 시키지 않음. + * - 최대 크기의 벡터를 설정함. + */ .macro finalise_el2_state check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 From 2bbad08407b85e8b2e2d34c228f66b9380641bf2 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 23 Dec 2023 13:00:19 +0000 Subject: [PATCH 021/104] IAMROOT20 20231223 - Add comments to 'hyp-stub.S' --- arch/arm64/kernel/hyp-stub.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 9439240c3fcf3..04c7deea1780a 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -73,6 +73,7 @@ SYM_CODE_START_LOCAL(elx_sync) eret SYM_CODE_END(elx_sync) +/* IAMROOT20_END 20231223 */ SYM_CODE_START_LOCAL(__finalise_el2) finalise_el2_state From 30a9c8cae578f48172eae356182afa085aa95b59 Mon Sep 17 00:00:00 2001 From: fehead Date: Mon, 25 Dec 2023 11:48:18 +0900 Subject: [PATCH 022/104] IAMROOT20 20231224 Add comments for init_feature_override --- arch/arm64/include/asm/el2_setup.h | 16 +++++++++++++++ arch/arm64/kernel/idreg-override.c | 32 +++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 037724b19c5c8..442e9e41a00bd 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -197,6 +197,22 @@ .endm #ifndef __KVM_NVHE_HYPERVISOR__ +/* IAMROOT20 20231223 + * bool check_override() + * { + * u64 val = IdReg[fld:fld+width]; + * if(val == 0) + * return false; + * + * val = IdReg_override.val; + * u64 mask = IdReg_override.mask; + * if(mask == 0) + * return true; + * if(val & mask) + * return true; + * return false; + * } + */ // This will clobber tmp1 and tmp2, and expect tmp1 to contain // the id register value as read from the HW .macro __check_override idreg, fld, width, pass, fail, tmp1, tmp2 diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index b1f1780f9956a..0321bc07793ad 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -209,25 +209,47 @@ static int __init find_field(const char *cmdline, if (!parameqn(cmdline, opt, len)) return -1; + /* IAMROOT20 20231223 + * exam) cmdline = "kaslr.disabled=1" + * cmdline + len = "1" + * return : 0, v = 1 + */ return kstrtou64(cmdline + len, 0, v); } +/* IAMROOT20_START 20231223 + * exam) cmdline="kaslr.disabled=1" + */ static void __init match_options(const char *cmdline) { int i; for (i = 0; i < ARRAY_SIZE(regs); i++) { int f; - + /* IAMROOT20 20231223 + * exam) kaslr -> regs[6] + */ if (!regs[i]->override) continue; for (f = 0; strlen(regs[i]->fields[f].name); f++) { + /* IAMROOT20 20231223 + * exam) kaslr.fileds[0] = { "disabled", 0, 4, NULL} + * shift = 0; + * width = 4; + * mask = GENMASK_ULL(3, 0) -> 0b1111 -> 0xf + */ u64 shift = regs[i]->fields[f].shift; u64 width = regs[i]->fields[f].width ?: 4; u64 mask = GENMASK_ULL(shift + width - 1, shift); u64 v; + /* IAMROOT20 20231223 + * exam) cmdline = "kaslr.disabled=1" + * regs = &kaslr, f = 0 + * + * return : 0, v = 1 + */ if (find_field(cmdline, regs[i], f, &v)) continue; @@ -236,6 +258,10 @@ static void __init match_options(const char *cmdline) * it by setting the value to the all-ones while * clearing the mask... Yes, this is fragile. */ + /* IAMROOT20 20231223 + * filter에서 실패가 나오면 mask값은 0으로 설정하고 + * val값은 모두 1로 셋팅하여 invalid value임을 표시한다. + */ if (regs[i]->fields[f].filter && !regs[i]->fields[f].filter(v)) { regs[i]->override->val |= mask; @@ -285,6 +311,10 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) match_options(buf); + /* IAMROOT20 20231223 + * exam) buf = "nokaslr" 일경우 + * aliases[i].feature = "kaslr.disabled=1" + */ for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++) if (parameq(buf, aliases[i].alias)) __parse_cmdline(aliases[i].feature, false); From 2752e83f5f2435415f2ed716c4bc81ffbc46503e Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 6 Jan 2024 09:08:17 +0000 Subject: [PATCH 023/104] IAMROOT20 20240106 - Add comments to 'hyp-stub.S' --- arch/arm64/kernel/hyp-stub.S | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 04c7deea1780a..512117b8eb1d8 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -46,6 +46,9 @@ SYM_CODE_END(__hyp_stub_vectors) .align 11 SYM_CODE_START_LOCAL(elx_sync) + /* IAMROOT20 20240106 + * x0 = #HVC_FINALISE_EL2 + */ cmp x0, #HVC_SET_VECTORS b.ne 1f msr vbar_el2, x1 @@ -140,6 +143,9 @@ SYM_CODE_END(__finalise_el2) .pushsection .idmap.text, "ax" SYM_CODE_START_LOCAL(enter_vhe) + /* IAMROOT20 20230106 + * TLB invalidate by VMID(Virtual Machine ID ), All at stage 1, EL1. + */ // Invalidate TLBs before enabling the MMU tlbi vmalle1 dsb nsh @@ -216,6 +222,7 @@ SYM_FUNC_END(__hyp_reset_vectors) * * w0: boot mode, as returned by init_kernel_el() */ +/* IAMROOT20_START 20240106 */ SYM_FUNC_START(finalise_el2) // Need to have booted at EL2 cmp w0, #BOOT_CPU_MODE_EL2 From 1e42d6ac0b8151a362989500ba642db0a7f0a2ac Mon Sep 17 00:00:00 2001 From: fehead Date: Sat, 6 Jan 2024 21:39:57 +0900 Subject: [PATCH 024/104] IAMROOT20 20240106 Add comments for smp_setup_processor_id --- arch/arm64/include/asm/memory.h | 12 ++++++++++++ arch/arm64/include/asm/sysreg.h | 5 +++++ arch/arm64/kernel/head.S | 3 ++- arch/arm64/kernel/setup.c | 5 +++++ include/linux/compiler_attributes.h | 4 ++++ include/linux/sched/task_stack.h | 3 +++ init/main.c | 9 +++++++++ kernel/fork.c | 3 +++ 8 files changed, 43 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 9d0e7df5a01c3..f83776d941dee 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -105,6 +105,9 @@ #define PAGE_END (_PAGE_END(VA_BITS_MIN)) #endif /* CONFIG_KASAN */ +/* IAMROOT20 20240106 + * MIN_THREAD_SHIFT 14 or 15(CONFIG_KASAN_GENERIC) + */ #define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) /* @@ -114,6 +117,10 @@ #if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT) #define THREAD_SHIFT PAGE_SHIFT #else +/* IAMROOT20 20240106 + * exam) PAGE_SHIFT 12 + * THREAD_SHIFT 14 or 15 + */ #define THREAD_SHIFT MIN_THREAD_SHIFT #endif @@ -121,6 +128,11 @@ #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) #endif +/* IAMROOT20 20240106 + * + * exam) PAGE_SHIFT 12 일경우 + * THREAD_SIZE 16k or 32k + */ #define THREAD_SIZE (UL(1) << THREAD_SHIFT) /* diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index a0bdde444313e..e4f9a34c4ef5a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -864,6 +864,11 @@ * For registers without architectural names, or simply unsupported by * GAS. */ +/* IAMROOT20 20240106 + * r = SYS_MPIDR_EL1 + * mrs_s __val, SYS_MPIDR_EL1 + * return __val; + */ #define read_sysreg_s(r) ({ \ u64 __val; \ asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b585e156a29e1..7620b5a345383 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -662,9 +662,10 @@ SYM_FUNC_END(create_kernel_mapping) scs_load_current /* IAMROOT20 20231118 - * tmp1 = __per_cpu_offset[NR_CPUS] + * tmp1 = __per_cpu_offset * tmp2 = tsk.thread_info.cpu * tmp1 = __per_cpu_offset[tmp2] + * tpidr_el1 = tmp1 */ adr_l \tmp1, __per_cpu_offset ldr w\tmp2, [\tsk, #TSK_TI_CPU] diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 1c40cbe733dbc..828780124ee39 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -88,6 +88,11 @@ u64 __cacheline_aligned boot_args[4]; void __init smp_setup_processor_id(void) { + /* IAMROOT20 20240106 + * mpidr = MPIDR_EL1 & 0xff00ffffff + * mpidir Affinity level 0~3 정보를 가져옴. + * http://jake.dothome.co.kr/smp_setup_processor_id + */ u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; set_cpu_logical_map(0, mpidr); diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index e659cb6fded39..33d76d55dc372 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -140,6 +140,10 @@ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute */ #if __has_attribute(__externally_visible__) +/* IAMROOT20 20240106 + * __externally_visible__ : 파일에게 이 함수 또는 변수를 사용할 수 없음으로 + * 표시하지 않도록 이 함수 또는 변수를 사용한다고 알려줍니다 + */ # define __visible __attribute__((__externally_visible__)) #else # define __visible diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index f158b025c1750..0804142e59374 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -25,6 +25,9 @@ static __always_inline void *task_stack_page(const struct task_struct *task) static __always_inline unsigned long *end_of_stack(const struct task_struct *task) { + /* IAMROOT20 20240106 + * CONFIG_STACK_GROWSUP : 스택이 상향으로 push되는 경우에 사용 + */ #ifdef CONFIG_STACK_GROWSUP return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; #else diff --git a/init/main.c b/init/main.c index af50044deed56..ee0b5f78196c8 100644 --- a/init/main.c +++ b/init/main.c @@ -877,6 +877,14 @@ static void __init print_unknown_bootoptions(void) memblock_free(unknown_options, len); } +/* IAMROOT20 20240106 + * asmlinkage : 어셈블리와 링크가 가능하다는 뜻 즉, 어셈블리어로 짜여진 코드에서 + * 이 함수를 호출 할 수 있다는 뜻입니다. + * __visible : 링커가 이 함수를 생략하지 못하도록 하는 것 같습니다. + * __no_sanitize_address : C/C++에서 메모리 버그를 감지하기 위한 감지기(detector)를 + * 사용하지 않겠다. + * __noreturn : 이 함수는 리턴하지 않는다. + */ asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(void) { char *command_line; @@ -884,6 +892,7 @@ asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(v set_task_stack_end_magic(&init_task); smp_setup_processor_id(); + /* IAMROOT20_END 20240106 */ debug_objects_early_init(); init_vmlinux_build_id(); diff --git a/kernel/fork.c b/kernel/fork.c index 41c964104b584..8184340aeb912 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1090,6 +1090,9 @@ int __weak arch_dup_task_struct(struct task_struct *dst, return 0; } +/* IAMROOT20 20240106 + * 스택의 맨 끝에 magic 코드를 써 넣는다. + */ void set_task_stack_end_magic(struct task_struct *tsk) { unsigned long *stackend; From db2788ea39bbea48ed2f075e7880045066dd1482 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 13 Jan 2024 09:01:41 +0000 Subject: [PATCH 025/104] IAMROOT20 20240113 - Add comment to 'spinlock_types_raw.h' --- include/linux/spinlock_types_raw.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h index 91cb36b65a170..7c362e77614c2 100644 --- a/include/linux/spinlock_types_raw.h +++ b/include/linux/spinlock_types_raw.h @@ -11,6 +11,14 @@ #include +/* IAMROOT20 20240113 + * raw_lock : 스핀락. 값이 0 이상인지 체크를 통해, 락이 걸려 있는지 확인 + * magic : 스핀락이 만들어질 때 설정되는 랜덤한 정수 + * owner : 어떤 프로세스에서 실행되는지에 대한 정봅 + * owner_cpu : 몇 번째 CPU에서 실행되는지에 대한 정보 + * dep_map : 구조체가 현재 접근중인 lock을 lock_class에 연결 + * - https://m.blog.naver.com/nawoo/220913522363 + */ typedef struct raw_spinlock { arch_spinlock_t raw_lock; #ifdef CONFIG_DEBUG_SPINLOCK From a6c829820af1cf186a72bfa72875bffae8448851 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 13 Jan 2024 09:02:39 +0000 Subject: [PATCH 026/104] IAMROOT20 20240113 - Add comments to 'debugobjects.c' --- lib/debugobjects.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 984985c39c9b0..8ef22e6b6f759 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -56,6 +56,9 @@ static DEFINE_PER_CPU(struct debug_percpu_free, percpu_obj_pool); static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; +/* IAMROOT20 20240113 + * __initdata : 커널 초기화 이후 해제되는 영역에 코드를 배치함으로써 메모리 영역 확보 +*/ static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; static DEFINE_RAW_SPINLOCK(pool_lock); @@ -1323,6 +1326,10 @@ static inline void debug_objects_selftest(void) { } * the static object pool objects into the poll list. After this call * the object tracker is fully operational. */ +/* IAMROOT20 20240113 + * ODEBUG_HASH_SIZE : 16K + * ODEBUG_POOL_SIZE : 1024 +*/ void __init debug_objects_early_init(void) { int i; From 6166582c6f468a1a2bbc7d5213b07f78b1d87cb2 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 13 Jan 2024 09:03:04 +0000 Subject: [PATCH 027/104] IAMROOT20 20240113 - Add comments to 'head.S' --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index ee0b5f78196c8..4d6bc34c789a1 100644 --- a/init/main.c +++ b/init/main.c @@ -892,7 +892,7 @@ asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(v set_task_stack_end_magic(&init_task); smp_setup_processor_id(); - /* IAMROOT20_END 20240106 */ + /* IAMROOT20_END 20240106 */ /* IAMROOT20_START 20240113 */ debug_objects_early_init(); init_vmlinux_build_id(); From a03e864959932facef431a8d65a979037a39bb27 Mon Sep 17 00:00:00 2001 From: fehead Date: Sat, 13 Jan 2024 22:00:24 +0900 Subject: [PATCH 028/104] IAMROOT20 20240113 Add comments for init_vmlinux_build_id, cgroup_init_early --- arch/arm64/kernel/setup.c | 3 +++ include/linux/cgroup-defs.h | 5 +++++ init/main.c | 2 +- kernel/cgroup/cgroup.c | 8 ++++++++ lib/buildid.c | 31 +++++++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 828780124ee39..19d816bc1e691 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -291,6 +291,9 @@ static int __init reserve_memblock_reserved_regions(void) } arch_initcall(reserve_memblock_reserved_regions); +/* IAMROOT20 20240113 + * __cpu_logical_map[0] = 0(mpidr & 0xff00ffff) + */ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; u64 cpu_logical_map(unsigned int cpu) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8a0d5466c7be1..0f689acd5bf4b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -42,6 +42,11 @@ struct poll_table_struct; #define SUBSYS(_x) _x ## _cgrp_id, enum cgroup_subsys_id { #include + /* IAMROOT20 20240113 + * SUBSYS(cpuset) --> cpuset_cgrp_id, + * SUBSYS(cpu) --> cpu_cgrp_id, + * SUBSYS(memory) --> memory_cgrp_id, + */ CGROUP_SUBSYS_COUNT, }; #undef SUBSYS diff --git a/init/main.c b/init/main.c index ee0b5f78196c8..4d6bc34c789a1 100644 --- a/init/main.c +++ b/init/main.c @@ -892,7 +892,7 @@ asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(v set_task_stack_end_magic(&init_task); smp_setup_processor_id(); - /* IAMROOT20_END 20240106 */ + /* IAMROOT20_END 20240106 */ /* IAMROOT20_START 20240113 */ debug_objects_early_init(); init_vmlinux_build_id(); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 4d42f0cbc11ea..78fd85fc3a3da 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -129,6 +129,13 @@ static struct workqueue_struct *cgroup_destroy_wq; #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys, struct cgroup_subsys *cgroup_subsys[] = { #include + /* IAMROOT20 20240113 + * SUBSYS(cpuset) --> [cpuset_cgrp_id] = &cpuset_cgrp_subsys, + * SUBSYS(cpu) --> [cpu_cgrp_id] = &cpu_cgrp_subsys, + * SUBSYS(cpuacct) --> [cpuacct_cgrp_id] = & cpuacct_cgrp_subsys, + * SUBSYS(io) --> [io_cgrp_id] = &io_cgrp_subsys, + * SUBSYS(memory) --> [memory_cgrp_id] = &memory_cgrp_subsys, + */ }; #undef SUBSYS @@ -6052,6 +6059,7 @@ int __init cgroup_init_early(void) ctx.root = &cgrp_dfl_root; init_cgroup_root(&ctx); + /* IAMROOT20_END 20240113 */ cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; RCU_INIT_POINTER(init_task.cgroups, &init_css_set); diff --git a/lib/buildid.c b/lib/buildid.c index e3a7acdeef0ed..5be99b6c8beee 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -20,6 +20,25 @@ static int parse_build_id_buf(unsigned char *build_id, { Elf32_Word note_offs = 0, new_offs; + /* IAMROOT20 20240113 + * sizeof(Elf32_Nhdr) = 12 + * + * exam) + * +-------------------------+ + * |<--------Elf32_Nhdr----->| + * |namesz |descsz | type | + * +-------------------------+ + * 04000000 14000000 03000000 474e5500 ............GNU. + * |<---------------------- build id - --------------- + * b752b23a ddbcb03d 42316e26 804bbcba .R.:...=B1n&.K.. + * |------>| + * 415c0cb6 06000000 04000000 01010000 A\.............. + * 4c696e75 78000000 00000000 06000000 Linux........... + * 01000000 00010000 4c696e75 78000000 ........Linux... + * 00000000 + * + * build_id = b752b23a ddbcb03d 42316e26 804bbcba 415c0cb6 + */ while (note_offs + sizeof(Elf32_Nhdr) < note_size) { Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); @@ -28,10 +47,16 @@ static int parse_build_id_buf(unsigned char *build_id, !strcmp((char *)(nhdr + 1), "GNU") && nhdr->n_descsz > 0 && nhdr->n_descsz <= BUILD_ID_SIZE_MAX) { + /* IAMROOT20 20240113 + * Elf32_Nhdr과 name 다음에 있는 build id를 build_id에 복사한다. + */ memcpy(build_id, note_start + note_offs + ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), nhdr->n_descsz); + /* IAMROOT20 20240113 + * build_id 남은 부분을 0으로 셋팅한다. + */ memset(build_id + nhdr->n_descsz, 0, BUILD_ID_SIZE_MAX - nhdr->n_descsz); if (size) @@ -182,6 +207,12 @@ unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX] __ro_after_init; */ void __init init_vmlinux_build_id(void) { + /* IAMROOT20 20240113 + * __weak : https://kldp.org/node/40383 + * 같은 이름의 심볼이 있으면 weak symbol이 strong symbol에게 overriding이 되게 하는 것입니다. + * 예를 들어 shared library에 a라는 weak symbol이 있을 때 이를 사용하는 프로그램에서 a라는 + * 심볼이 있으면 프로그램에서는 shared library가 아닌 자신에 있는 것을 사용하게 됩니다. + */ extern const void __start_notes __weak; extern const void __stop_notes __weak; unsigned int size = &__stop_notes - &__start_notes; From c5ffc13bf4268aa1d487eea59a8f646d5866cdd2 Mon Sep 17 00:00:00 2001 From: fehead Date: Sat, 20 Jan 2024 20:31:59 +0900 Subject: [PATCH 029/104] IAMROOT20 20240113 Add comments for cgroup_init_early --- include/linux/cgroup-defs.h | 5 +++++ include/linux/compiler_types.h | 10 ++++++++++ include/linux/container_of.h | 3 +++ include/linux/err.h | 18 ++++++++++++++++++ include/linux/idr.h | 4 ++++ include/linux/rcupdate.h | 23 +++++++++++++++++++++++ kernel/cgroup/cgroup.c | 29 +++++++++++++++++++++++++++-- kernel/cgroup/cpuset.c | 4 ++++ 8 files changed, 94 insertions(+), 2 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 0f689acd5bf4b..df3922f6cc070 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -246,6 +246,11 @@ struct css_set { * css_set_rwsem, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ + /* IAMROOT20 20240120 + * 이 cgroup 그룹을 사용하여 실행 중인 모든 작업을 나열합니다. + * mg_tasks는 이 cset에 속하지만 마이그레이션되거나 마이그레이션되는 과정에 있는 작업을 나열합니다. + * css_set_rwsem으로 보호되지만 마이그레이션 중에 작업이 mg_tasks로 이동되면 cgroup_mutex를 유지하는 동안 안전하게 읽을 수 있습니다. + */ struct list_head tasks; struct list_head mg_tasks; struct list_head dying_tasks; diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 547ea1ff806eb..771786bc064f3 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -16,6 +16,16 @@ #endif /* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ +/* IAMROOT20 20240120 + * sparse 설명 : + * https://kldp.org/node/96789 + * https://pinocc.tistory.com/144 + * https://www.kernel.org/doc/Documentation/dev-tools/sparse.rst + * https://en.wikipedia.org/wiki/Sparse + * https://sparse.docs.kernel.org/en/latest/annotations.html + * + * __force는 sparse 속성이 없더라도 경고를 내지 않는다. + */ #ifdef __CHECKER__ /* address spaces */ # define __kernel __attribute__((address_space(0))) diff --git a/include/linux/container_of.h b/include/linux/container_of.h index 713890c867bea..45f849a931f27 100644 --- a/include/linux/container_of.h +++ b/include/linux/container_of.h @@ -15,6 +15,9 @@ * * WARNING: any const qualifier of @ptr is lost. */ +/* IAMROOT20 20240120 + * structure에 있는 member를 가지고 structure주소를 알아낸다. + */ #define container_of(ptr, type, member) ({ \ void *__mptr = (void *)(ptr); \ static_assert(__same_type(*(ptr), ((type *)0)->member) || \ diff --git a/include/linux/err.h b/include/linux/err.h index a139c64aef2ac..d79168a4a2980 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -15,10 +15,28 @@ * This should be a per-architecture thing, to allow different * error and pointer decisions. */ +/* IAMROOT20 20240120 + * 커널 포인터에는 중복된 정보가 있으므로 오류 코드나 동일한 반환 값을 가진 일반 + * 포인터를 반환할 수 있는 체계를 사용할 수 있습니다. + * 이는 다양한 오류 및 포인터 결정을 허용하기 위해 아키텍처별로 이루어져야 합니다. + */ #define MAX_ERRNO 4095 #ifndef __ASSEMBLY__ +/* IAMROOT20 20240120 + * x > (unsigned long)-4095 + * -> x > 0xffff_ffff_ffff_f000 + * 커널영역주소는 0xffff_0000_0000_0000 ~ 0xffff_ffff_ffff_ffff + * -1(0xffff_ffff_ffff_ffff) ~ -4095(0xffff_ffff_ffff_f000)가 에러, 그 외에는 주소 + * + * 에러 번호는 1 ~ 34까지 할당되어 있으며 ERR_PTR함수를 호출할때 -를 붙여 호출한다 + * exam) ERR_PTR(-ENOMEM) + * 따라서 에러 번호는 -1 ~ -34까지 해당되며 unsigned long으로 바꾸면 + * 0xffff_ffff_ffff_ffff(-1) ~ 0xffff_ffff_ffff_ffde(-34)에 해당된다. + * + * include/uapi/asm-generic/errno-base.h 참고 + */ #define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) static inline void * __must_check ERR_PTR(long error) diff --git a/include/linux/idr.h b/include/linux/idr.h index a0dce14090a9e..69146b5bf82b6 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -8,6 +8,10 @@ * Small id to pointer translation service avoiding fixed sized * tables. */ +/* IAMROOT20 20240120 + * IDA, IDR 한글 설명. + * https://velog.io/@mythos/Linux-Tutorial-23-IDRID-Radix-IDAID-Allocator + */ #ifndef __IDR_H__ #define __IDR_H__ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dcd2cf1e8326d..4d32d5c4c6d78 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -429,6 +429,15 @@ static inline void rcu_preempt_sleep_check(void) { } */ #ifdef __CHECKER__ +/* IAMROOT20 20240120 + * rcu_check_sparse(p, __rcu) + * --> p에 __rcu 속성이 있는지 체크한다. + * + * sparse 설명 : + * https://kldp.org/node/96789 + * https://www.kernel.org/doc/Documentation/dev-tools/sparse.rst + * https://en.wikipedia.org/wiki/Sparse + */ #define rcu_check_sparse(p, space) \ ((void)(((typeof(*p) space *)p) == p)) #else /* #ifdef __CHECKER__ */ @@ -482,6 +491,16 @@ static inline void rcu_preempt_sleep_check(void) { } * RCU_INITIALIZER() - statically initialize an RCU-protected global variable * @v: The value to statically initialize with. */ +/* IAMROOT20 20240120 + * __force는 v에 __rcu속성이 없더라도 타입케이스팅을 한다. + * exam) + * int a = 10; + * int * v = &a; + * int __rcu r = RCU_INITIALIZER(v); + * -> (typeof(*v) __force __rcu *)(v) + * -> (int __force __rcu *)(v) + * --> int __rcu r = (int __force __rcu *)v; + */ #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) /** @@ -933,6 +952,10 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no * ordering guarantees for either the CPU or the compiler. */ +/* IAMROOT20 20240120 + * rcu_check_sparse p에 __rcu 속성이 있는지 체크한다. + * p = RCU_INITIALIZER(v) + */ #define RCU_INIT_POINTER(p, v) \ do { \ rcu_check_sparse(p, __rcu); \ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 78fd85fc3a3da..bf6a67c0afb34 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -132,7 +132,7 @@ struct cgroup_subsys *cgroup_subsys[] = { /* IAMROOT20 20240113 * SUBSYS(cpuset) --> [cpuset_cgrp_id] = &cpuset_cgrp_subsys, * SUBSYS(cpu) --> [cpu_cgrp_id] = &cpu_cgrp_subsys, - * SUBSYS(cpuacct) --> [cpuacct_cgrp_id] = & cpuacct_cgrp_subsys, + * SUBSYS(cpuacct) --> [cpuacct_cgrp_id] = &cpuacct_cgrp_subsys, * SUBSYS(io) --> [io_cgrp_id] = &io_cgrp_subsys, * SUBSYS(memory) --> [memory_cgrp_id] = &memory_cgrp_subsys, */ @@ -142,6 +142,13 @@ struct cgroup_subsys *cgroup_subsys[] = { /* array of cgroup subsystem names */ #define SUBSYS(_x) [_x ## _cgrp_id] = #_x, static const char *cgroup_subsys_name[] = { +/* IAMROOT20 20240120 + SUBSYS(cpuset) -> [cpuset_cgrp_id] = "cpuset", + SUBSYS(cpu) -> [cpu_cgrp_id] = "cpu", + SUBSYS(cpuacct) -> [cpuacct_cgrp_id] = "cpuacct", + SUBSYS(io) -> [io_cgrp_id] = "io", + SUBSYS(memory) -> [memory_cgrp_id] = "memory", +*/ #include }; #undef SUBSYS @@ -208,6 +215,10 @@ static u64 css_serial_nr_next = 1; * These bitmasks identify subsystems with specific features to avoid * having to do iterative checks repeatedly. */ +/* IAMROOT20 20240120 + * 이러한 비트마스크는 반복적인 검사를 반복적으로 수행할 필요가 없도록 특정 + * 기능을 갖춘 하위 시스템을 식별합니다. + */ static u16 have_fork_callback __read_mostly; static u16 have_exit_callback __read_mostly; static u16 have_release_callback __read_mostly; @@ -5508,6 +5519,10 @@ static int online_css(struct cgroup_subsys_state *css) lockdep_assert_held(&cgroup_mutex); + /* IAMROOT20 20240120 + * ss -> cpu_set_cgrp_subsys 이면 + * css_online = cpuset_css_online + */ if (ss->css_online) ret = ss->css_online(css); if (!ret) { @@ -6038,6 +6053,11 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) /* At system boot, before all subsystems have been * registered, no tasks have been forked, so we don't * need to invoke fork callbacks here. */ + /* IAMROOT20 20240120 + * + * 시스템 부팅 시 모든 하위 시스템이 등록되기 전에 포크된 작업이 없으므 + * 로 여기서 포크 콜백을 호출할 필요가 없습니다. + */ BUG_ON(!list_empty(&init_task.tasks)); BUG_ON(online_css(css)); @@ -6059,11 +6079,16 @@ int __init cgroup_init_early(void) ctx.root = &cgrp_dfl_root; init_cgroup_root(&ctx); - /* IAMROOT20_END 20240113 */ + /* IAMROOT20_END 20240113 */ /* IAMROOT20_START 20240120 */ cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; RCU_INIT_POINTER(init_task.cgroups, &init_css_set); + /* IAMROOT20 20240120 + * + * for (i = 0; i < CGROUP_SUBSYS_COUNT && + * ((ss = cgroup_subsys[i]) || true); i++) + */ for_each_subsys(ss, i) { WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id, "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n", diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index e4ca2dd2b7648..c838b7113b84e 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3147,6 +3147,10 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) static int cpuset_css_online(struct cgroup_subsys_state *css) { + /* IAMROOT20 20240120 + * cs = &top_cpuset + * parent = NULL + */ struct cpuset *cs = css_cs(css); struct cpuset *parent = parent_cs(cs); struct cpuset *tmp_cs; From 47c57e4f5eeb85d3b2a6f3e08daaa4fdfe26b76c Mon Sep 17 00:00:00 2001 From: park-seong-su Date: Sun, 21 Jan 2024 20:53:42 +0900 Subject: [PATCH 030/104] IAMROOT20 20240120 Add comments to local_irq_disable, boot_cpu_init --- arch/arm64/include/asm/irqflags.h | 10 ++++++++++ include/linux/irqflags.h | 8 ++++++++ kernel/cpu.c | 9 +++++++++ 3 files changed, 27 insertions(+) diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index e0f5f6b73edd7..e6864a2d999d6 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -56,6 +56,9 @@ static inline void arch_local_irq_enable(void) } } +/* IAMROOT20 20240120 + msr daifset, #3 명령어로 irq disable + */ static __always_inline void __daif_local_irq_disable(void) { barrier(); @@ -75,6 +78,13 @@ static __always_inline void __pmr_local_irq_disable(void) barrier(); } +/* IAMROOT20 20240120 + arch로 시작하는 함수이므로 arch/arm64 경로로 찾아옴 + gicv3의 PMR 기능을 사용할 경우 __pmr_local_irq_disable 함수를 호출하고 + 아닐 경우 __daif_local_irq_disable 함수를 호출 + PMR, PSEUDO_NMI 관련 내용은 추후에 다시 알아보기로 하고 daif로 따라감 + https://github.com/iamroot18/5.10/blob/i515/arch/arm64/include/asm/ptrace.h + */ static inline void arch_local_irq_disable(void) { if (__irqflags_uses_pmr()) { diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5ec0fa71399e4..0a738052d2f8e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -170,6 +170,9 @@ extern void warn_bogus_irq_restore(void); /* * Wrap the arch provided IRQ routines to provide appropriate checks. */ +/* IAMROOT20 20240120 + raw_local_irq_disable은 arch_local_irq_disable로 치환됨 + */ #define raw_local_irq_disable() arch_local_irq_disable() #define raw_local_irq_enable() arch_local_irq_enable() #define raw_local_irq_save(flags) \ @@ -240,6 +243,11 @@ extern void warn_bogus_irq_restore(void); #else /* !CONFIG_TRACE_IRQFLAGS */ #define local_irq_enable() do { raw_local_irq_enable(); } while (0) +/* IAMROOT20 20240120 + local_irq_disable은 raw_local_irq_disable로 치환됨 + local: 현재 PE를 의미함 + remote: 현재 PE를 제외한 나머지를 의미함 + */ #define local_irq_disable() do { raw_local_irq_disable(); } while (0) #define local_irq_save(flags) do { raw_local_irq_save(flags); } while (0) #define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0) diff --git a/kernel/cpu.c b/kernel/cpu.c index f4a2c5845bcbd..43d2d720e1172 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2698,6 +2698,15 @@ void set_cpu_online(unsigned int cpu, bool online) } } +/* IAMROOT20 20240120 + boot core의 online, active, present, possible bit를 set + __cpu_possible_mask: 디바이스 트리에서 파싱된 CPU 를 의미함 + __cpu_present_mask: 실제로 CPU 가 물리적으로 존재하는 것을 의미함 + __cpu_online_mask: 부팅 완료를 나타냄 + __cpu_active_mask: 스케줄러가 바라보는 core 상태를 나타냄 + https://yohda.tistory.com/entry/%EC%BB%A4%EB%84%90%ED%8C%8C%EC%9B%8C-Linux-CPU-core-%EC%A0%84%EC%9B%90-%EA%B4%80%EB%A6%AC5-CPU-control-hotplug%EC%9E%91%EC%84%B1%EC%A4%91 + */ +/* IAMROOT20_END 20240120 */ /* * Activate the first processor. */ From b9833ae09aaad3534a77ba84a46f5d1e8a4cb7c8 Mon Sep 17 00:00:00 2001 From: park-seong-su Date: Sat, 27 Jan 2024 19:09:01 +0900 Subject: [PATCH 031/104] IAMROOT20 20240120 Add comments to boot_cpu_init --- arch/arm64/include/asm/percpu.h | 7 ++++++ arch/arm64/include/asm/smp.h | 6 +++++ arch/arm64/kernel/smp.c | 3 +++ include/asm-generic/percpu.h | 4 ++++ include/linux/bitops.h | 12 ++++++++++ include/linux/compiler.h | 4 ++++ include/linux/cpumask.h | 6 +++++ include/linux/percpu-defs.h | 42 +++++++++++++++++++++++++++++++++ include/linux/smp.h | 7 ++++++ include/linux/types.h | 5 ++++ include/uapi/linux/const.h | 6 +++++ kernel/cpu.c | 4 ++++ 12 files changed, 106 insertions(+) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index b9ba19dbdb694..42a5d99cef233 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -29,6 +29,10 @@ static inline unsigned long __hyp_my_cpu_offset(void) return read_sysreg(tpidr_el2); } +/* IAMROOT20 20240127 + * mrs off, tpidr_el1 + * tpidr_el1값을 가져와서 반환 + */ static inline unsigned long __kern_my_cpu_offset(void) { unsigned long off; @@ -49,6 +53,9 @@ static inline unsigned long __kern_my_cpu_offset(void) #ifdef __KVM_NVHE_HYPERVISOR__ #define __my_cpu_offset __hyp_my_cpu_offset() #else +/* IAMROOT20 20240127 + * tpidr_el1값을 반환 + */ #define __my_cpu_offset __kern_my_cpu_offset() #endif diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index f2d26235bfb4e..85d3c6958e946 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -31,6 +31,9 @@ #include #include +/* IAMROOT20 20240127 + * extern __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); /* @@ -40,6 +43,9 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); * And we can't use this_cpu_ptr() either, as that winds up recursing back * here under CONFIG_DEBUG_PREEMPT=y. */ +/* IAMROOT20 20240127 + * ex) &cpu_number + 0 + */ #define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number)) /* diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d00d4cbb31b16..87d731eca088d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -53,6 +53,9 @@ #include +/* IAMROOT20 20240127 + * __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 6432a7fade913..ad3d7282b3a16 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -41,6 +41,10 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; * translations for raw_cpu_ptr(). */ #ifndef arch_raw_cpu_ptr +/* IAMROOT20 20240127 + * ex) ptr(&cpu_number) __my_cpu_offset(0) + &cpu_number + 0 + */ #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) #endif diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 2ba557e067fe6..025a2d017645e 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -15,7 +15,19 @@ # define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) #endif +/* IAMROOT20 20240127 + * ex) type(long) + * (sizeof(long) * BITS_PER_BYTE) + * (8 * 8) + * (64) + */ #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +/* IAMROOT20 20240127 + * ex) nr(256) + * __KERNEL_DIV_ROUND_UP(256, BITS_PER_TYPE(long)) + * __KERNEL_DIV_ROUND_UP(256, 64) + * (4) + */ #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) #define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7779a18b24fc..ae759bd227e4e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -163,6 +163,10 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif #ifndef RELOC_HIDE +/* IAMROOT20 20240127 + * ex) ptr(&cpu_number) off(0) + * (typeof(&cpu_number)) (&cpu_number + (0)) + */ # define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ __ptr = (unsigned long) (ptr); \ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ca736b05ec7b0..98e861ebec408 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -16,6 +16,12 @@ #include /* Don't assign or return these: may not be this big! */ +/* IAMROOT20 20240127 + * __cpu_possible_mask, __cpu_online_mask, __cpu_present_mask, __cpu_active_mask는 모두 struct cpumask 형으로 선언되어 있음 + * typedef struct cpumask { + * unsigned long bits[4]; + * } cpumask_t; + */ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; /** diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index e60727be79c44..5c55a22407423 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -46,6 +46,13 @@ * linkage errors occur due the compiler generating the wrong code to access * that section. */ +/* IAMROOT20 20240127 + * ex) sec("..read_mostly") + * __percpu __attribute__((section(PER_CPU_BASE_SECTION "..read_mostly"))) + * __attribute__((noderef, address_space(__percpu))) __attribute__((section(PER_CPU_BASE_SECTION "..read_mostly"))) + * __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) + * PER_CPU_ATTRIBUTES는 arm64에서 define되어 있지 않음 + */ #define __PCPU_ATTRS(sec) \ __percpu __attribute__((section(PER_CPU_BASE_SECTION sec))) \ PER_CPU_ATTRIBUTES @@ -97,9 +104,19 @@ /* * Normal declaration and definition macros. */ +/* IAMROOT20 20240127 + * ex) type(int) name(cpu_number) sec("..read_mostly") + * extern __PCPU_ATTRS("..read_mostly") __typeof__(int) cpu_number + * extern __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ #define DECLARE_PER_CPU_SECTION(type, name, sec) \ extern __PCPU_ATTRS(sec) __typeof__(type) name +/* IAMROOT20 20240127 + * ex) type(int) name(cpu_number) sec("..read_mostly") + * __PCPU_ATTRS("..read_mostrly") __typeof__(int) cpu_number + * __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_ATTRS(sec) __typeof__(type) name #endif @@ -165,9 +182,19 @@ /* * Declaration/definition used for per-CPU variables that must be read mostly. */ +/* IAMROOT20 20240127 + * ex) type(int) name(cpu_number) + * DECLARE_PER_CPU_SECTION(int, cpu_number, "..read_mostly") + * extern __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ #define DECLARE_PER_CPU_READ_MOSTLY(type, name) \ DECLARE_PER_CPU_SECTION(type, name, "..read_mostly") +/* IAMROOT20 20240127 + * ex) type(int) name(cpu_number) + * DEFINE_PER_CPU_SECTION(int, cpu_number, "..read_mostly") + * __attribute__((noderef, address_space(__percpu))) __attribute__((section(".data..percpu" "..read_mostly"))) __typeof__(int) cpu_number + */ #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "..read_mostly") @@ -214,6 +241,10 @@ * + 0 is required in order to convert the pointer type from a * potential array type to a pointer to a single item of the array. */ +/* IAMROOT20 20240127 + * https://stackoverflow.com/questions/30831335/verify-pcpu-ptr-function-in-linux-kernel-what-does-it-do + * http://www.iamroot.org/xe/index.php?mid=Programming&document_srl=208290 + */ #define __verify_pcpu_ptr(ptr) \ do { \ const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL; \ @@ -227,6 +258,13 @@ do { \ * to prevent the compiler from making incorrect assumptions about the * pointer value. The weird cast keeps both GCC and sparse happy. */ +/* IAMROOT20 20240127 + * ex) __p(&cpu_number) __offset(0) + * RELOC_HIDE((typeof(*(&cpu_number)) __kernel __force *)(&cpu_number), (0)) + * RELOC_HIDE((typeof(*(&cpu_number)) ___attribute__((address_space(0))) __attribute__((force)) *)(&cpu_number), (0)) + * 요약하면 RELOC_HIDE((int *) &cpu_number, 0) + * &cpu_number + 0 + */ #define SHIFT_PERCPU_PTR(__p, __offset) \ RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) @@ -236,6 +274,10 @@ do { \ SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))); \ }) +/* IAMROOT20 20240127 + * ex) ptr(&cpu_number) + * &cpu_number + 0 + */ #define raw_cpu_ptr(ptr) \ ({ \ __verify_pcpu_ptr(ptr); \ diff --git a/include/linux/smp.h b/include/linux/smp.h index 91ea4a67f8ca2..a1bfff834a9b2 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -261,6 +261,10 @@ static inline int get_boot_cpu_id(void) * regular asm read for the stable. */ #ifndef __smp_processor_id +/* IAMROOT20 20240127 + * ex) x(void로 추정됨) + * &cpu_number + 0 + */ #define __smp_processor_id(x) raw_smp_processor_id(x) #endif @@ -268,6 +272,9 @@ static inline int get_boot_cpu_id(void) extern unsigned int debug_smp_processor_id(void); # define smp_processor_id() debug_smp_processor_id() #else +/* IAMROOT20 20240127 + * &cpu_number + 0 + */ # define smp_processor_id() __smp_processor_id() #endif diff --git a/include/linux/types.h b/include/linux/types.h index 688fb943556a1..78c7f5388ac9a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -7,6 +7,11 @@ #ifndef __ASSEMBLY__ +/* IAMROOT20 20240127 + * ex) name(bits) bits(256) + * unsigned long bits[BITS_TO_LONGS(256)] + * unsigned long bits[4] + */ #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index a429381e7ca50..0e22fbd3898af 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -31,6 +31,12 @@ #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) +/* IAMROOT20 20240127 + * ex) n(256) d(64) + * (((256) + (64) - 1) / (64)) + * ROUND_UP(256/64) + * (4) + */ #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) #endif /* _UAPI_LINUX_CONST_H */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 43d2d720e1172..e4136064a80bd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2707,11 +2707,15 @@ void set_cpu_online(unsigned int cpu, bool online) https://yohda.tistory.com/entry/%EC%BB%A4%EB%84%90%ED%8C%8C%EC%9B%8C-Linux-CPU-core-%EC%A0%84%EC%9B%90-%EA%B4%80%EB%A6%AC5-CPU-control-hotplug%EC%9E%91%EC%84%B1%EC%A4%91 */ /* IAMROOT20_END 20240120 */ +/* IAMROOT20_START 20240127 */ /* * Activate the first processor. */ void __init boot_cpu_init(void) { + /* IAMROOT20 20240127 + * 현재 실행중인 코어의 논리번호를 가져옴 + */ int cpu = smp_processor_id(); /* Mark the boot cpu "present", "online" etc for SMP and UP case */ From c1a150e4e55fc19c337a64ec61ccf5dd0ebcc397 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 27 Jan 2024 22:12:43 +0900 Subject: [PATCH 032/104] IAMROOT20 20240127 Add comments --- include/asm-generic/bitops/atomic.h | 20 +++++++++++++++++++ .../asm-generic/bitops/instrumented-atomic.h | 4 ++++ include/linux/atomic/atomic-arch-fallback.h | 4 ++++ include/linux/cpumask.h | 14 +++++++++++++ init/main.c | 1 + 5 files changed, 43 insertions(+) diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index 71ab4ba9c25d1..90a7fe636785f 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -36,10 +36,30 @@ static __always_inline int arch_test_and_set_bit(unsigned int nr, volatile unsigned long *p) { long old; + /* IAMROOT20 20240127 + * BIT_WORD() : 몇 번째 word(long)을 쓸 것인지 + * BIT_MASK() : word 내에서 해당 bit를 set + */ unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); old = arch_atomic_long_fetch_or(mask, (atomic_long_t *)p); + /* IAMROOT20 20240127 + * ex) + * 1) old값에 mask bit가 set되어 있지 않은 경우 + * old = 0001 + * mask = 0010 + * + * old & mask = 0000 + * !!(old & mask) -> false + * + * 2) old값에 mask bit가 set되어 있는 경우 + * old = 0011 + * mask = 0010 + * + * old & mask = 0010 + * !!(0ld & mask) -> true + */ return !!(old & mask); } diff --git a/include/asm-generic/bitops/instrumented-atomic.h b/include/asm-generic/bitops/instrumented-atomic.h index 4225a8ca9c1a0..fafa7a5d371a4 100644 --- a/include/asm-generic/bitops/instrumented-atomic.h +++ b/include/asm-generic/bitops/instrumented-atomic.h @@ -67,6 +67,10 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) */ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { + /* IAMROOT20 20240127 + * KCSAN - Kernel Concurrency Sanitizer + * - 커널 공간에서 동작하는 동적 data race 탐지 기능 + */ kcsan_mb(); instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); return arch_test_and_set_bit(nr, addr); diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index a6e4437c5f369..19993804fb86d 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -2162,6 +2162,10 @@ arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 ret; __atomic_pre_full_fence(); + /* IARMROOT24 20240127 + * arch_atomic64_fetch_or_relaxed(i, v) + * => *v |= i, 이전 v값을 return한다 + */ ret = arch_atomic64_fetch_or_relaxed(i, v); __atomic_post_full_fence(); return ret; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 98e861ebec408..a2aea753d229b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -80,6 +80,17 @@ static inline void set_nr_cpu_ids(unsigned int nr) * optimization comes from being able to potentially use a compile-time * constant instead of a run-time generated exact number of CPUs. */ +/* IAMROOT20 20240127 + * optimization을 위해 NR_CPUS 값에 따라 small/large_cpumask_bits를 설정 + * ex) BITS_PER_LONG = 64 + * 1) NR_CPUS <= 64 + * small, large -> NR_CPUS + * 2) 64 < NR_CPUS <= 4*64(256) + * small -> nr_cpu_ids + * large -> NR_CPUS + * 3) 256 < NR_CPUS + * small, large -> nr_cpus_ids + */ #if NR_CPUS <= BITS_PER_LONG #define small_cpumask_bits ((unsigned int)NR_CPUS) #define large_cpumask_bits ((unsigned int)NR_CPUS) @@ -521,6 +532,9 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum */ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { + /* IAMROOT20 20240127 + * test_and_set_bit(cpu, cpumask->bits) + */ return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } diff --git a/init/main.c b/init/main.c index 4d6bc34c789a1..4e1050b1bfafb 100644 --- a/init/main.c +++ b/init/main.c @@ -906,6 +906,7 @@ asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(v * enable them. */ boot_cpu_init(); + /* IAMROOT20_END 20240127 */ page_address_init(); pr_notice("%s", linux_banner); early_security_init(); From af6e2e3256236c4eafebc268e28d73ab20eb3c60 Mon Sep 17 00:00:00 2001 From: park-seong-su Date: Sat, 3 Feb 2024 20:26:42 +0900 Subject: [PATCH 033/104] IAMROOT20 20240203 Add comments to early_security_init --- include/asm-generic/vmlinux.lds.h | 7 +++++++ include/linux/lsm_hooks.h | 17 +++++++++++++++++ init/main.c | 9 +++++++++ security/lockdown/lockdown.c | 17 +++++++++++++++++ security/security.c | 24 ++++++++++++++++++++++++ 5 files changed, 74 insertions(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index da9e5629ea43d..19a7fe636dd97 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -304,6 +304,13 @@ . = ALIGN(8); \ BOUNDED_SECTION_PRE_LABEL(.lsm_info.init, _lsm_info, __start, __end) +/* IAMROOT20 20240203 + * EARLY_LSM_TABLE() + * . = ALIGN(8); + * __start_early_lsm_info=.; + * KEEP(*(.early_lsm_info.init)) + * __end_early_lsm_info=.; + */ #define EARLY_LSM_TABLE() \ . = ALIGN(8); \ BOUNDED_SECTION_PRE_LABEL(.early_lsm_info.init, _early_lsm_info, __start, __end) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index ab2b2fafa4a45..0b9e6079030f5 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -29,12 +29,29 @@ #include #include +/* IAMROOT20 20240203 + * union security_list_options { + * int (*binder_set_context_mgr)(const struct cred *mgr); + * int (*binder_transaction)(const struct cred *from,const struct cred *to); + * ... + * }; + */ union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); #include "lsm_hook_defs.h" #undef LSM_HOOK }; +/* IAMROOT20 20240203 + * struct security_hook_heads { + * struct hlist_head binder_set_context_mgr; + * struct hlist_head binder_transaction; + * .... + * } __attribute__((__designated_init__)) __attribute__((randomize_layout)); + * __attribute__((__designated_init__)): 지정된 초기화 사용 + * __attribute__((randomize_layout)): 구조체 필드 배치 무작위화 + * 구조체 필드 위치가 랜덤하게 결정되면 초기화시 의도했던대로 안되기 때문에 지정된 초기화 옵션을 사용하는 것으로 보임 + */ struct security_hook_heads { #define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME; #include "lsm_hook_defs.h" diff --git a/init/main.c b/init/main.c index 4e1050b1bfafb..f1086425b1cbe 100644 --- a/init/main.c +++ b/init/main.c @@ -907,7 +907,16 @@ asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(v */ boot_cpu_init(); /* IAMROOT20_END 20240127 */ + /* IAMROOT20_START 20240203 */ + /* IAMROOT20 20240203 + * 32bit 시스템에서는 1:1 매핑이 일부만 가능하기 때문에 ZONE_NORMAL을 초과하는 메모리가 이 영역을 사용한다. + * 64bit 시스템에서는 모든 물리 메모리가 1:1 매핑이 가능하므로 ZONE_HIGHMEM을 사용하지 않는다. + */ page_address_init(); + /* IAMROOT20 20240203 + * linux_banner 출력 + * ex) Linux version 6.6.10-1-rt19-MANJARO (builduser@fv-az1491-220) (gcc (GCC) 13.2.1 20230801, GNU ld (GNU Binutils) 2.41.0) #1 SMP PREEMPT_RT Wed Jan 10 09:41:23 UTC 2024 + */ pr_notice("%s", linux_banner); early_security_init(); setup_arch(&command_line); diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c index 68d19632aeb72..d4d62f904500a 100644 --- a/security/lockdown/lockdown.c +++ b/security/lockdown/lockdown.c @@ -71,6 +71,16 @@ static int lockdown_is_locked_down(enum lockdown_reason what) return 0; } +/* IAMROOT20 20240203 + * static struct security_hook_list lockdown_hooks[] __ro_after_init = { + * { + * .head = &security_hook_heads.locked_down, + * .hook = { + * .locked_down = lockdown_is_locked_down + * } + * } + * } + */ static struct security_hook_list lockdown_hooks[] __ro_after_init = { LSM_HOOK_INIT(locked_down, lockdown_is_locked_down), }; @@ -157,6 +167,13 @@ static int __init lockdown_secfs_init(void) core_initcall(lockdown_secfs_init); +/* IAMROOT20 20240203 + * ex) DEFINE_EARLY_LSM(lockdown) = { + .name = "lockdown", + .init = lockdown_lsm_init, + }; + static struct lsm_info __early_lsm_lockdown __used __section(".early_lsm_info.init") __aligned(sizeof(unsigned long)) + */ #ifdef CONFIG_SECURITY_LOCKDOWN_LSM_EARLY DEFINE_EARLY_LSM(lockdown) = { #else diff --git a/security/security.c b/security/security.c index d5ff7ff45b776..6bb0b34bacb1e 100644 --- a/security/security.c +++ b/security/security.c @@ -75,6 +75,9 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX + 1] = { [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality", }; +/* IAMROOT20 20240203 + * security_hook_heads 선언부 + */ struct security_hook_heads security_hook_heads __ro_after_init; static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain); @@ -240,6 +243,9 @@ static void __init initialize_lsm(struct lsm_info *lsm) int ret; init_debug("initializing %s\n", lsm->name); + /* IAMROOT20 20240203 + * ex) lsm->init(): lockdown_lsm_init() + */ ret = lsm->init(); WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret); } @@ -399,15 +405,27 @@ static void __init ordered_lsm_init(void) kfree(ordered_lsms); } +/* IAMROOT20 20240203 + * https://scienceon.kisti.re.kr/commons/util/originalView.do?cn=JAKO200311921893007&oCn=JAKO200311921893007&dbt=JAKO&journal=NJOU00291864 + * https://lesstif.gitbook.io/web-service-hardening/selinux + */ int __init early_security_init(void) { struct lsm_info *lsm; + /* IAMROOT20 20240203 + * security_hook_heads.binder_set_context_mgr.first = NULL; + * security_hook_heads.binder_transaction.first = NULL; + * ... + */ #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ INIT_HLIST_HEAD(&security_hook_heads.NAME); #include "linux/lsm_hook_defs.h" #undef LSM_HOOK + /* IAMROOT20 20240203 + * ex) lsm -> __early_lsm_lockdown + */ for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) { if (!lsm->enabled) lsm->enabled = &lsm_enabled_true; @@ -522,6 +540,9 @@ void __init security_add_hooks(struct security_hook_list *hooks, int count, int i; for (i = 0; i < count; i++) { + /* IAMROOT20 20240203 + * ex) lockdown_hooks[0].lsm = "lockdown" + */ hooks[i].lsm = lsm; hlist_add_tail_rcu(&hooks[i].list, hooks[i].head); } @@ -530,6 +551,9 @@ void __init security_add_hooks(struct security_hook_list *hooks, int count, * Don't try to append during early_security_init(), we'll come back * and fix this up afterwards. */ + /* IAMROOT20 20240203 + * early_security_init() 로직에서는 수행되지 않음 + */ if (slab_is_available()) { if (lsm_append(lsm, &lsm_names) < 0) panic("%s - Cannot get early memory.\n", __func__); From fcf5c36fcd8704f034615605beaf1126fc849381 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 3 Feb 2024 13:05:11 +0000 Subject: [PATCH 034/104] IAMROOT20 - Add comment to 'cpufeature.c' --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7d7128c651614..b39a7b8539fe4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1629,6 +1629,7 @@ bool kaslr_requires_kpti(void) * E0PD does a similar job to KPTI so can be used instead * where available. */ + /* IAMROOT20_REVIEW_END 20240203 */ if (IS_ENABLED(CONFIG_ARM64_E0PD)) { u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); if (cpuid_feature_extract_unsigned_field(mmfr2, From bc91fff713ca700bc0f2a3029c3333e20873dbe1 Mon Sep 17 00:00:00 2001 From: park-seong-su Date: Sat, 17 Feb 2024 19:07:02 +0900 Subject: [PATCH 035/104] IAMROOT20 20240217 Add comments to kaslr_requires_kpti --- arch/arm64/include/asm/memory.h | 3 +++ arch/arm64/kernel/cpufeature.c | 15 ++++++++++++++- mm/memblock.c | 2 +- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f83776d941dee..54c8b909bf9c9 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -241,6 +241,9 @@ static inline unsigned long kaslr_offset(void) return kimage_vaddr - KIMAGE_VADDR; } +/* IAMROOT20 20240217 + * kalsr_offset이 2MB 이상이면 kaslr이 enable되었다고 판단함 + */ static inline bool kaslr_enabled(void) { /* diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b39a7b8539fe4..44803f8e664af 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1620,6 +1620,10 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) * state once the SMP CPUs are up and thus make the switch to non-global * mappings if required. */ +/* IAMROOT20 20240217 + * arm64 e0pd 기능을 사용할 수 있으면 kpti는 필요없다고 판단하여 false 반환 + * e0pd 기능을 사용할 수 없으면 kaslr이 켜져 있는지만 확인하여 true 반환 + */ bool kaslr_requires_kpti(void) { if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) @@ -1629,7 +1633,13 @@ bool kaslr_requires_kpti(void) * E0PD does a similar job to KPTI so can be used instead * where available. */ - /* IAMROOT20_REVIEW_END 20240203 */ + /* IAMROOT20_END 20240203 */ + /* IAMROOT20_START 20240217 */ + /* IAMROOT20 20240217 + * ID_AA64MMFR2_EL1.E0PD를 읽어와서 E0PD 기능을 지원하는지 확인 + * 만약 지원하면 false를 반환 + * 만약 지원하지 않으면 계속 진행 + */ if (IS_ENABLED(CONFIG_ARM64_E0PD)) { u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); if (cpuid_feature_extract_unsigned_field(mmfr2, @@ -1641,6 +1651,9 @@ bool kaslr_requires_kpti(void) * Systems affected by Cavium erratum 24756 are incompatible * with KPTI. */ + /* IAMROOT20 20240217 + * 특정 벤더사에 대한 예외 상황으로 넘어감 + */ if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { extern const struct midr_range cavium_erratum_27456_cpus[]; diff --git a/mm/memblock.c b/mm/memblock.c index 3feafea06ab21..2c85f06e55c07 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -26,7 +26,7 @@ #define INIT_PHYSMEM_REGIONS 4 #ifndef INIT_MEMBLOCK_RESERVED_REGIONS -# define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS +#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS #endif #ifndef INIT_MEMBLOCK_MEMORY_REGIONS From 7769ef13193a20a42f014119f49398dddb618534 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 17 Feb 2024 13:06:55 +0000 Subject: [PATCH 036/104] IAMROOT20 20240217 - Add comments to 'fixmap.h' --- arch/arm64/include/asm/fixmap.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 62f77b82e957f..ca0c21e167c9d 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -77,6 +77,12 @@ enum fixed_addresses { * Temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. */ + + /* IAMROOT20 20240217 + * NR_FIX_BITMAPS = 0x40000 / 2^12 = 64 + * TOTAL_FIX_BITMAPS = 448 + * FIX_BITMAP_BEGIN = 523 + 447 = 970 + */ #define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) #define FIX_BTMAPS_SLOTS 7 #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) From 980ab56f964ba6c0f9541fbd8dd3b3bbe19a51d6 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 17 Feb 2024 13:07:12 +0000 Subject: [PATCH 037/104] IAMROOT20 20240217 - Add comments to 'setup.c' --- arch/arm64/kernel/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 19d816bc1e691..ecd97d9b21e6a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -314,9 +314,16 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) */ arm64_use_ng_mappings = kaslr_requires_kpti(); + /* + * IAMROOT20 20240217 + * fixmap으로 각 페이지 테이블의 물리 주소에 접근할 수 있게 된다. + * head.S에서 이미 한 번 호출이 되었기 때문에, 아래 호출에선 페이지 테이블 주소가 + * 잘 설정되어 있는지만 확인하는 것 같다. + */ early_fixmap_init(); early_ioremap_init(); + /* IAMROOT20_END 20240217 */ setup_machine_fdt(__fdt_pointer); /* From ec2c8aa7adf91cc0cad58eaa105b467d73f73492 Mon Sep 17 00:00:00 2001 From: Leem ChaeHoon Date: Wed, 21 Feb 2024 13:59:48 +0900 Subject: [PATCH 038/104] =?UTF-8?q?=5F=5Fset=5Ffixmap=20=EC=A3=BC=EC=84=9D?= =?UTF-8?q?=EC=88=98=EC=A0=95.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arch/arm64/mm/fixmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index 1eb715337667e..e3b4a755861bc 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -146,7 +146,7 @@ void __set_fixmap(enum fixed_addresses idx, if (pgprot_val(flags)) { /* IAMROOT20 20231216 - * FIX_P*D의 가상주소(ptep)에 bm_p*d(phys)를 매핑한다. + * phys주소를 bm_pte[][]에 쓴다(매핑한다). */ set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); } else { From 91835eb9d1aa32c915a42d21580f75843387ba75 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 24 Feb 2024 22:00:28 +0900 Subject: [PATCH 039/104] IAMROOT20 20240224 Add comments --- mm/early_ioremap.c | 15 ++++++++++++++- mm/memblock.c | 4 ++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index 9bc12e526ed0b..2d702a3caa07d 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -108,7 +108,10 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) int i, slot; WARN_ON(system_state >= SYSTEM_RUNNING); - + + /* IAMROOT_20240224_START + * slot - 사용하지 않은 slot을 찾는다 + */ slot = -1; for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { if (!prev_map[i]) { @@ -130,6 +133,11 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) /* * Mappings have to be page-aligned */ + /* IAMROOT_20240224 + * mapping 할 영역을 page-aligned을 맞춘다. + * - phys_addr : 시작주소 -> page aligned-down + * - size : page aligned된 크기 (>= 1 page size) + */ offset = offset_in_page(phys_addr); phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr + 1) - phys_addr; @@ -146,6 +154,11 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) */ idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; while (nrpages > 0) { + /* IAMROOT_20240224 + * 정규 페이징 이후에는 early_ioremap() api를 사용하지 않음 + * - x86, arm64는 정규 페이징 이후에도 early_ioremap() api를 + * 사용할 수 있음 + */ if (after_paging_init) __late_set_fixmap(idx, phys_addr, prot); else diff --git a/mm/memblock.c b/mm/memblock.c index 2c85f06e55c07..248aea6d051c2 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -106,6 +106,10 @@ unsigned long min_low_pfn; unsigned long max_pfn; unsigned long long max_possible_pfn; +/* IAMROOT20 20240224 + * INIT_MEMBLOCK_MEMORY_REGIONS = 128 * 8 + * INIT_MEMBLOCK_RESERVED_REGIONS = 128 + 256 + 1 + */ static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock; static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP From e1f3c7132385a1b90031ac919fbecd9647cb2517 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 24 Feb 2024 13:04:19 +0000 Subject: [PATCH 040/104] IAMROOT20 20240224 - Add comments to 'fdt.c' --- drivers/of/fdt.c | 4 ++++ scripts/dtc/libfdt/fdt.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index bf502ba8da958..6f4a24032b4ce 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1071,6 +1071,7 @@ int __init early_init_dt_scan_root(void) dt_root_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT; dt_root_addr_cells = OF_ROOT_NODE_ADDR_CELLS_DEFAULT; + /* IAMROOT20_END 20240224 */ prop = of_get_flat_dt_prop(node, "#size-cells", NULL); if (prop) dt_root_size_cells = be32_to_cpup(prop); @@ -1281,6 +1282,9 @@ bool __init early_init_dt_verify(void *params) /* Setup flat device-tree pointer */ initial_boot_params = params; + /* IAMROOT20 20240224 + * crc32 알고리즘을 사용하여 이후 fdt에 변경 사항이 생겼는지를 확인한다. + */ of_fdt_crc32 = crc32_be(~0, initial_boot_params, fdt_totalsize(initial_boot_params)); return true; diff --git a/scripts/dtc/libfdt/fdt.c b/scripts/dtc/libfdt/fdt.c index 20c6415b9ced1..1f2f0671f8a7c 100644 --- a/scripts/dtc/libfdt/fdt.c +++ b/scripts/dtc/libfdt/fdt.c @@ -96,6 +96,10 @@ int fdt_check_header(const void *fdt) if (fdt_magic(fdt) != FDT_MAGIC) return -FDT_ERR_BADMAGIC; + /* IAMROOT20 20240224 + * can_assume : DTB에 운용자가 값을 설정하지 않는 경우 false, + * 그렇지 않은 경우에는 true를 반환한다. + */ if (!can_assume(LATEST)) { if ((fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION) || (fdt_last_comp_version(fdt) > From 40be2fa729aa72c77d614e52840ba8dfa0d2c934 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 2 Mar 2024 20:48:22 +0900 Subject: [PATCH 041/104] IAMROOT20 20240302 Add comments --- drivers/of/fdt.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 6f4a24032b4ce..05e138bbda6f7 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1110,7 +1110,10 @@ int __init early_init_dt_scan_memory(void) /* We are scanning "memory" nodes only */ if (type == NULL || strcmp(type, "memory") != 0) continue; - + + /* IAMROOT20_20240302 START + * status property가 없거나 "ok","okay" 인지 확인 + */ if (!of_fdt_device_is_available(fdt, node)) continue; @@ -1228,10 +1231,17 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) return; } + /* IAMROOT20_20240302 + * base를 PAGE_SIZE 만큼 올림하고, + * size는 (PAGE_SIZE - base offset)만큼 줄인다 + */ if (!PAGE_ALIGNED(base)) { size -= PAGE_SIZE - (base & ~PAGE_MASK); base = PAGE_ALIGN(base); } + /* IAMROOT20_20240302 + * size를 PAGE_SIZE 만큼 내림한다 + */ size &= PAGE_MASK; if (base > MAX_MEMBLOCK_ADDR) { From 627f7b4b23d279b7bd4b06f00fc273c74251843f Mon Sep 17 00:00:00 2001 From: park-seong-su Date: Sun, 3 Mar 2024 21:02:33 +0900 Subject: [PATCH 042/104] IAMROOT20 20240302 Add comments to setup_fdt_machine, memblock --- arch/arm64/kernel/setup.c | 9 +++++++++ mm/memblock.c | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index ecd97d9b21e6a..df91916838db1 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -214,14 +214,23 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) cpu_relax(); } + /* IAMROOT20_20240302 + * 가상 메모리의 fdt 영역을 read-only로 remapping + */ /* Early fixups are done, map the FDT as read-only now */ fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); + /* IAMROOT20_20240302 + * dtb root 노드에서 model 프로퍼티의 문자열을 name에 가져옴 + */ name = of_flat_dt_get_machine_name(); if (!name) return; pr_info("Machine model: %s\n", name); + /* IAMROOT20_20240302 + * 디버깅 로그를 위해 dump_stack_arch_desc_str 전역 변수에 앞에서 가져온 name을 저장 해놈 + */ dump_stack_set_arch_desc("%s (DT)", name); } diff --git a/mm/memblock.c b/mm/memblock.c index 248aea6d051c2..67aac85dbfe55 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -723,6 +723,9 @@ int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, * Return: * 0 on success, -errno on failure. */ +/* IAMROOT20_20240302 + * memblock.memory에 base부터 end까지 region을 등록 + */ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size - 1; @@ -867,6 +870,10 @@ int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) return memblock_remove_range(&memblock.reserved, base, size); } +/* IAMROOT20_20240302 + * memblock.reserved에 base부터 end까지 region을 등록 + */ +/* IAMROOT20_END 20240302 */ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size - 1; From c5fcc2896d43ca7b2a90aa5fe399092816cb525e Mon Sep 17 00:00:00 2001 From: park-seong-su Date: Sat, 9 Mar 2024 22:39:28 +0900 Subject: [PATCH 043/104] IAMROOT20 20240309 Add comments to memblock --- 1 | 2222 +++++++++++++++++++++++++++++++++++++++++++++++++ mm/memblock.c | 72 +- 2 files changed, 2293 insertions(+), 1 deletion(-) create mode 100644 1 diff --git a/1 b/1 new file mode 100644 index 0000000000000..1dfd416f16e1a --- /dev/null +++ b/1 @@ -0,0 +1,2222 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Procedures for maintaining information about logical memory blocks. + * + * Peter Bergner, IBM Corp. June 2001. + * Copyright (C) 2001 Peter Bergner. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "internal.h" + +#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_PHYSMEM_REGIONS 4 + +#ifndef INIT_MEMBLOCK_RESERVED_REGIONS +#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS +#endif + +#ifndef INIT_MEMBLOCK_MEMORY_REGIONS +#define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS +#endif + +/** + * DOC: memblock overview + * + * Memblock is a method of managing memory regions during the early + * boot period when the usual kernel memory allocators are not up and + * running. + * + * Memblock views the system memory as collections of contiguous + * regions. There are several types of these collections: + * + * * ``memory`` - describes the physical memory available to the + * kernel; this may differ from the actual physical memory installed + * in the system, for instance when the memory is restricted with + * ``mem=`` command line parameter + * * ``reserved`` - describes the regions that were allocated + * * ``physmem`` - describes the actual physical memory available during + * boot regardless of the possible restrictions and memory hot(un)plug; + * the ``physmem`` type is only available on some architectures. + * + * Each region is represented by struct memblock_region that + * defines the region extents, its attributes and NUMA node id on NUMA + * systems. Every memory type is described by the struct memblock_type + * which contains an array of memory regions along with + * the allocator metadata. The "memory" and "reserved" types are nicely + * wrapped with struct memblock. This structure is statically + * initialized at build time. The region arrays are initially sized to + * %INIT_MEMBLOCK_MEMORY_REGIONS for "memory" and + * %INIT_MEMBLOCK_RESERVED_REGIONS for "reserved". The region array + * for "physmem" is initially sized to %INIT_PHYSMEM_REGIONS. + * The memblock_allow_resize() enables automatic resizing of the region + * arrays during addition of new regions. This feature should be used + * with care so that memory allocated for the region array will not + * overlap with areas that should be reserved, for example initrd. + * + * The early architecture setup should tell memblock what the physical + * memory layout is by using memblock_add() or memblock_add_node() + * functions. The first function does not assign the region to a NUMA + * node and it is appropriate for UMA systems. Yet, it is possible to + * use it on NUMA systems as well and assign the region to a NUMA node + * later in the setup process using memblock_set_node(). The + * memblock_add_node() performs such an assignment directly. + * + * Once memblock is setup the memory can be allocated using one of the + * API variants: + * + * * memblock_phys_alloc*() - these functions return the **physical** + * address of the allocated memory + * * memblock_alloc*() - these functions return the **virtual** address + * of the allocated memory. + * + * Note, that both API variants use implicit assumptions about allowed + * memory ranges and the fallback methods. Consult the documentation + * of memblock_alloc_internal() and memblock_alloc_range_nid() + * functions for more elaborate description. + * + * As the system boot progresses, the architecture specific mem_init() + * function frees all the memory to the buddy page allocator. + * + * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the + * memblock data structures (except "physmem") will be discarded after the + * system initialization completes. + */ + +#ifndef CONFIG_NUMA +struct pglist_data __refdata contig_page_data; +EXPORT_SYMBOL(contig_page_data); +#endif + +unsigned long max_low_pfn; +unsigned long min_low_pfn; +unsigned long max_pfn; +unsigned long long max_possible_pfn; + +/* IAMROOT20 20240224 + * INIT_MEMBLOCK_MEMORY_REGIONS = 128 * 8 + * INIT_MEMBLOCK_RESERVED_REGIONS = 128 + 256 + 1 + */ +static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS]; +#endif + +struct memblock memblock __initdata_memblock = { + .memory.regions = memblock_memory_init_regions, + .memory.cnt = 1, /* empty dummy entry */ + .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS, + .memory.name = "memory", + + .reserved.regions = memblock_reserved_init_regions, + .reserved.cnt = 1, /* empty dummy entry */ + .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, + .reserved.name = "reserved", + + .bottom_up = false, + .current_limit = MEMBLOCK_ALLOC_ANYWHERE, +}; + +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +struct memblock_type physmem = { + .regions = memblock_physmem_init_regions, + .cnt = 1, /* empty dummy entry */ + .max = INIT_PHYSMEM_REGIONS, + .name = "physmem", +}; +#endif + +/* + * keep a pointer to &memblock.memory in the text section to use it in + * __next_mem_range() and its helpers. + * For architectures that do not keep memblock data after init, this + * pointer will be reset to NULL at memblock_discard() + */ +static __refdata struct memblock_type *memblock_memory = &memblock.memory; +#define for_each_memblock_type(i, memblock_type, rgn) \ + for (i = 0, rgn = &memblock_type->regions[0]; \ + i < memblock_type->cnt; \ + i++, rgn = &memblock_type->regions[i]) + +#define memblock_dbg(fmt, ...) \ + do { \ + if (memblock_debug) \ + pr_info(fmt, ##__VA_ARGS__); \ + } while (0) + +static int memblock_debug __initdata_memblock; +static bool system_has_some_mirror __initdata_memblock = false; +static int memblock_can_resize __initdata_memblock; +static int memblock_memory_in_slab __initdata_memblock = 0; +static int memblock_reserved_in_slab __initdata_memblock = 0; + +static enum memblock_flags __init_memblock choose_memblock_flags(void) +{ + return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; +} + +/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ +/* IAMROOT20 20240309 + * ex) PHYS_ADDR_MAX = 0xffff_ffff_ffff_ffff + base = 0xffff_ffff_ffff_fff0 + size = 0x100 + return size = min(0x100, 0xf) = 0xf; + (base + size)가 overflow가 발생하면 (PHYS_ADDR_MAX - base)를 size로 재설정 + */ +static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) +{ + return *size = min(*size, PHYS_ADDR_MAX - base); +} + +/* + * Address comparison utilities + */ +static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2) +{ + return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); +} + +bool __init_memblock memblock_overlaps_region(struct memblock_type *type, + phys_addr_t base, phys_addr_t size) +{ + unsigned long i; + + memblock_cap_size(base, &size); + + for (i = 0; i < type->cnt; i++) + if (memblock_addrs_overlap(base, size, type->regions[i].base, + type->regions[i].size)) + break; + return i < type->cnt; +} + +/** + * __memblock_find_range_bottom_up - find free area utility in bottom-up + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or + * %MEMBLOCK_ALLOC_ACCESSIBLE + * @size: size of free area to find + * @align: alignment of free area to find + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * @flags: pick from blocks based on memory attributes + * + * Utility called from memblock_find_in_range_node(), find free area bottom-up. + * + * Return: + * Found address on success, 0 on failure. + */ +static phys_addr_t __init_memblock +__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, + phys_addr_t size, phys_addr_t align, int nid, + enum memblock_flags flags) +{ + phys_addr_t this_start, this_end, cand; + u64 i; + + for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { + this_start = clamp(this_start, start, end); + this_end = clamp(this_end, start, end); + + cand = round_up(this_start, align); + if (cand < this_end && this_end - cand >= size) + return cand; + } + + return 0; +} + +/** + * __memblock_find_range_top_down - find free area utility, in top-down + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or + * %MEMBLOCK_ALLOC_ACCESSIBLE + * @size: size of free area to find + * @align: alignment of free area to find + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * @flags: pick from blocks based on memory attributes + * + * Utility called from memblock_find_in_range_node(), find free area top-down. + * + * Return: + * Found address on success, 0 on failure. + */ +static phys_addr_t __init_memblock +__memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, + phys_addr_t size, phys_addr_t align, int nid, + enum memblock_flags flags) +{ + phys_addr_t this_start, this_end, cand; + u64 i; + + for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end, + NULL) { + this_start = clamp(this_start, start, end); + this_end = clamp(this_end, start, end); + + if (this_end < size) + continue; + + cand = round_down(this_end - size, align); + if (cand >= this_start) + return cand; + } + + return 0; +} + +/** + * memblock_find_in_range_node - find free area in given range and node + * @size: size of free area to find + * @align: alignment of free area to find + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or + * %MEMBLOCK_ALLOC_ACCESSIBLE + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * @flags: pick from blocks based on memory attributes + * + * Find @size free area aligned to @align in the specified range and node. + * + * Return: + * Found address on success, 0 on failure. + */ +static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, + phys_addr_t align, phys_addr_t start, + phys_addr_t end, int nid, + enum memblock_flags flags) +{ + /* pump up @end */ + if (end == MEMBLOCK_ALLOC_ACCESSIBLE || + end == MEMBLOCK_ALLOC_NOLEAKTRACE) + end = memblock.current_limit; + + /* avoid allocating the first page */ + start = max_t(phys_addr_t, start, PAGE_SIZE); + end = max(start, end); + + if (memblock_bottom_up()) + return __memblock_find_range_bottom_up(start, end, size, align, + nid, flags); + else + return __memblock_find_range_top_down(start, end, size, align, + nid, flags); +} + +/** + * memblock_find_in_range - find free area in given range + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or + * %MEMBLOCK_ALLOC_ACCESSIBLE + * @size: size of free area to find + * @align: alignment of free area to find + * + * Find @size free area aligned to @align in the specified range. + * + * Return: + * Found address on success, 0 on failure. + */ +static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, + phys_addr_t end, phys_addr_t size, + phys_addr_t align) +{ + phys_addr_t ret; + enum memblock_flags flags = choose_memblock_flags(); + +again: + ret = memblock_find_in_range_node(size, align, start, end, + NUMA_NO_NODE, flags); + + if (!ret && (flags & MEMBLOCK_MIRROR)) { + pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", + &size); + flags &= ~MEMBLOCK_MIRROR; + goto again; + } + + return ret; +} + +static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) +{ + type->total_size -= type->regions[r].size; + memmove(&type->regions[r], &type->regions[r + 1], + (type->cnt - (r + 1)) * sizeof(type->regions[r])); + type->cnt--; + + /* Special case for empty arrays */ + if (type->cnt == 0) { + WARN_ON(type->total_size != 0); + type->cnt = 1; + type->regions[0].base = 0; + type->regions[0].size = 0; + type->regions[0].flags = 0; + memblock_set_region_node(&type->regions[0], MAX_NUMNODES); + } +} + +#ifndef CONFIG_ARCH_KEEP_MEMBLOCK +/** + * memblock_discard - discard memory and reserved arrays if they were allocated + */ +void __init memblock_discard(void) +{ + phys_addr_t addr, size; + + if (memblock.reserved.regions != memblock_reserved_init_regions) { + addr = __pa(memblock.reserved.regions); + size = PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.reserved.max); + if (memblock_reserved_in_slab) + kfree(memblock.reserved.regions); + else + memblock_free_late(addr, size); + } + + if (memblock.memory.regions != memblock_memory_init_regions) { + addr = __pa(memblock.memory.regions); + size = PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.memory.max); + if (memblock_memory_in_slab) + kfree(memblock.memory.regions); + else + memblock_free_late(addr, size); + } + + memblock_memory = NULL; +} +#endif + +/** + * memblock_double_array - double the size of the memblock regions array + * @type: memblock type of the regions array being doubled + * @new_area_start: starting address of memory range to avoid overlap with + * @new_area_size: size of memory range to avoid overlap with + * + * Double the size of the @type regions array. If memblock is being used to + * allocate memory for a new reserved regions array and there is a previously + * allocated memory range [@new_area_start, @new_area_start + @new_area_size] + * waiting to be reserved, ensure the memory used by the new array does + * not overlap. + * + * Return: + * 0 on success, -1 on failure. + */ +static int __init_memblock memblock_double_array(struct memblock_type *type, + phys_addr_t new_area_start, + phys_addr_t new_area_size) +{ + struct memblock_region *new_array, *old_array; + phys_addr_t old_alloc_size, new_alloc_size; + phys_addr_t old_size, new_size, addr, new_end; + int use_slab = slab_is_available(); + int *in_slab; + + /* We don't allow resizing until we know about the reserved regions + * of memory that aren't suitable for allocation + */ + if (!memblock_can_resize) + return -1; + + /* Calculate new doubled size */ + old_size = type->max * sizeof(struct memblock_region); + new_size = old_size << 1; + /* + * We need to allocated new one align to PAGE_SIZE, + * so we can free them completely later. + */ + old_alloc_size = PAGE_ALIGN(old_size); + new_alloc_size = PAGE_ALIGN(new_size); + + /* Retrieve the slab flag */ + if (type == &memblock.memory) + in_slab = &memblock_memory_in_slab; + else + in_slab = &memblock_reserved_in_slab; + + /* Try to find some space for it */ + if (use_slab) { + new_array = kmalloc(new_size, GFP_KERNEL); + addr = new_array ? __pa(new_array) : 0; + } else { + /* only exclude range when trying to double reserved.regions */ + if (type != &memblock.reserved) + new_area_start = new_area_size = 0; + + addr = memblock_find_in_range(new_area_start + new_area_size, + memblock.current_limit, + new_alloc_size, PAGE_SIZE); + if (!addr && new_area_size) + addr = memblock_find_in_range(0, + min(new_area_start, memblock.current_limit), + new_alloc_size, PAGE_SIZE); + + new_array = addr ? __va(addr) : NULL; + } + if (!addr) { + pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", + type->name, type->max, type->max * 2); + return -1; + } + + new_end = addr + new_size - 1; + memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]", + type->name, type->max * 2, &addr, &new_end); + + /* + * Found space, we now need to move the array over before we add the + * reserved region since it may be our reserved array itself that is + * full. + */ + memcpy(new_array, type->regions, old_size); + memset(new_array + type->max, 0, old_size); + old_array = type->regions; + type->regions = new_array; + type->max <<= 1; + + /* Free old array. We needn't free it if the array is the static one */ + if (*in_slab) + kfree(old_array); + else if (old_array != memblock_memory_init_regions && + old_array != memblock_reserved_init_regions) + memblock_free(old_array, old_alloc_size); + + /* + * Reserve the new array if that comes from the memblock. Otherwise, we + * needn't do it + */ + if (!use_slab) + BUG_ON(memblock_reserve(addr, new_alloc_size)); + + /* Update slab flag */ + *in_slab = use_slab; + + return 0; +} + +/** + * memblock_merge_regions - merge neighboring compatible regions + * @type: memblock type to scan + * @start_rgn: start scanning from (@start_rgn - 1) + * @end_rgn: end scanning at (@end_rgn - 1) + * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn) + */ +static void __init_memblock memblock_merge_regions(struct memblock_type *type, + unsigned long start_rgn, + unsigned long end_rgn) +{ + int i = 0; + if (start_rgn) + i = start_rgn - 1; + end_rgn = min(end_rgn, type->cnt - 1); + while (i < end_rgn) { + struct memblock_region *this = &type->regions[i]; + struct memblock_region *next = &type->regions[i + 1]; + + if (this->base + this->size != next->base || + memblock_get_region_node(this) != + memblock_get_region_node(next) || + this->flags != next->flags) { + BUG_ON(this->base + this->size > next->base); + i++; + continue; + } + + this->size += next->size; + /* move forward from next + 1, index of which is i + 2 */ + memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); + type->cnt--; + end_rgn--; + } +} + +/** + * memblock_insert_region - insert new memblock region + * @type: memblock type to insert into + * @idx: index for the insertion point + * @base: base address of the new region + * @size: size of the new region + * @nid: node id of the new region + * @flags: flags of the new region + * + * Insert new memblock region [@base, @base + @size) into @type at @idx. + * @type must already have extra room to accommodate the new region. + */ +static void __init_memblock memblock_insert_region(struct memblock_type *type, + int idx, phys_addr_t base, + phys_addr_t size, + int nid, + enum memblock_flags flags) +{ + struct memblock_region *rgn = &type->regions[idx]; + + BUG_ON(type->cnt >= type->max); + memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); + rgn->base = base; + rgn->size = size; + rgn->flags = flags; + memblock_set_region_node(rgn, nid); + type->cnt++; + type->total_size += size; +} + +/** + * memblock_add_range - add new memblock region + * @type: memblock type to add new region into + * @base: base address of the new region + * @size: size of the new region + * @nid: nid of the new region + * @flags: flags of the new region + * + * Add new memblock region [@base, @base + @size) into @type. The new region + * is allowed to overlap with existing ones - overlaps don't affect already + * existing regions. @type is guaranteed to be minimal (all neighbouring + * compatible regions are merged) after the addition. + * + * Return: + * 0 on success, -errno on failure. + */ +/* IAMROOT20_START 20240309 */ +static int __init_memblock memblock_add_range(struct memblock_type *type, + phys_addr_t base, phys_addr_t size, + int nid, enum memblock_flags flags) +{ + bool insert = false; + phys_addr_t obase = base; + phys_addr_t end = base + memblock_cap_size(base, &size); + int idx, nr_new, start_rgn = -1, end_rgn; + struct memblock_region *rgn; + + if (!size) + return 0; + + /* special case for empty array */ + /* IAMROOT20 20240309 + * type에 등록된 regions이 없을 경우 + * new region을 등록 후 함수 종료 + */ + if (type->regions[0].size == 0) { + WARN_ON(type->cnt != 1 || type->total_size); + type->regions[0].base = base; + type->regions[0].size = size; + type->regions[0].flags = flags; + memblock_set_region_node(&type->regions[0], nid); + type->total_size = size; + return 0; + } + + /* + * The worst case is when new range overlaps all existing regions, + * then we'll need type->cnt + 1 empty regions in @type. So if + * type->cnt * 2 + 1 is less than or equal to type->max, we know + * that there is enough empty regions in @type, and we can insert + * regions directly. + */ + /* IAMROOT20 20240309 + * + */ + if (type->cnt * 2 + 1 <= type->max) + insert = true; + +repeat: + /* + * The following is executed twice. Once with %false @insert and + * then with %true. The first counts the number of regions needed + * to accommodate the new area. The second actually inserts them. + */ + base = obase; + nr_new = 0; + + for_each_memblock_type(idx, type, rgn) { + phys_addr_t rbase = rgn->base; + phys_addr_t rend = rbase + rgn->size; + + if (rbase >= end) + break; + if (rend <= base) + continue; + /* + * @rgn overlaps. If it separates the lower part of new + * area, insert that portion. + */ + if (rbase > base) { +#ifdef CONFIG_NUMA + WARN_ON(nid != memblock_get_region_node(rgn)); +#endif + WARN_ON(flags != rgn->flags); + nr_new++; + if (insert) { + if (start_rgn == -1) + start_rgn = idx; + end_rgn = idx + 1; + memblock_insert_region(type, idx++, base, + rbase - base, nid, + flags); + } + } + /* area below @rend is dealt with, forget about it */ + base = min(rend, end); + } + + /* insert the remaining portion */ + if (base < end) { + nr_new++; + if (insert) { + if (start_rgn == -1) + start_rgn = idx; + end_rgn = idx + 1; + memblock_insert_region(type, idx, base, end - base, + nid, flags); + } + } + + if (!nr_new) + return 0; + + /* + * If this was the first round, resize array and repeat for actual + * insertions; otherwise, merge and return. + */ + if (!insert) { + while (type->cnt + nr_new > type->max) + if (memblock_double_array(type, obase, size) < 0) + return -ENOMEM; + insert = true; + goto repeat; + } else { + memblock_merge_regions(type, start_rgn, end_rgn); + return 0; + } +} + +/** + * memblock_add_node - add new memblock region within a NUMA node + * @base: base address of the new region + * @size: size of the new region + * @nid: nid of the new region + * @flags: flags of the new region + * + * Add new memblock region [@base, @base + @size) to the "memory" + * type. See memblock_add_range() description for mode details + * + * Return: + * 0 on success, -errno on failure. + */ +int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, + int nid, enum memblock_flags flags) +{ + phys_addr_t end = base + size - 1; + + memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, + &base, &end, nid, flags, (void *)_RET_IP_); + + return memblock_add_range(&memblock.memory, base, size, nid, flags); +} + +/** + * memblock_add - add new memblock region + * @base: base address of the new region + * @size: size of the new region + * + * Add new memblock region [@base, @base + @size) to the "memory" + * type. See memblock_add_range() description for mode details + * + * Return: + * 0 on success, -errno on failure. + */ +/* IAMROOT20_20240302 + * memblock.memory에 base부터 end까지 region을 등록 + */ +int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) +{ + phys_addr_t end = base + size - 1; + + memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, + &base, &end, (void *)_RET_IP_); + + return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); +} + +/** + * memblock_isolate_range - isolate given range into disjoint memblocks + * @type: memblock type to isolate range for + * @base: base of range to isolate + * @size: size of range to isolate + * @start_rgn: out parameter for the start of isolated region + * @end_rgn: out parameter for the end of isolated region + * + * Walk @type and ensure that regions don't cross the boundaries defined by + * [@base, @base + @size). Crossing regions are split at the boundaries, + * which may create at most two more regions. The index of the first + * region inside the range is returned in *@start_rgn and end in *@end_rgn. + * + * Return: + * 0 on success, -errno on failure. + */ +static int __init_memblock memblock_isolate_range(struct memblock_type *type, + phys_addr_t base, phys_addr_t size, + int *start_rgn, int *end_rgn) +{ + phys_addr_t end = base + memblock_cap_size(base, &size); + int idx; + struct memblock_region *rgn; + + *start_rgn = *end_rgn = 0; + + if (!size) + return 0; + + /* we'll create at most two more regions */ + while (type->cnt + 2 > type->max) + if (memblock_double_array(type, base, size) < 0) + return -ENOMEM; + + for_each_memblock_type(idx, type, rgn) { + phys_addr_t rbase = rgn->base; + phys_addr_t rend = rbase + rgn->size; + + if (rbase >= end) + break; + if (rend <= base) + continue; + + if (rbase < base) { + /* + * @rgn intersects from below. Split and continue + * to process the next region - the new top half. + */ + rgn->base = base; + rgn->size -= base - rbase; + type->total_size -= base - rbase; + memblock_insert_region(type, idx, rbase, base - rbase, + memblock_get_region_node(rgn), + rgn->flags); + } else if (rend > end) { + /* + * @rgn intersects from above. Split and redo the + * current region - the new bottom half. + */ + rgn->base = end; + rgn->size -= end - rbase; + type->total_size -= end - rbase; + memblock_insert_region(type, idx--, rbase, end - rbase, + memblock_get_region_node(rgn), + rgn->flags); + } else { + /* @rgn is fully contained, record it */ + if (!*end_rgn) + *start_rgn = idx; + *end_rgn = idx + 1; + } + } + + return 0; +} + +static int __init_memblock memblock_remove_range(struct memblock_type *type, + phys_addr_t base, phys_addr_t size) +{ + int start_rgn, end_rgn; + int i, ret; + + ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); + if (ret) + return ret; + + for (i = end_rgn - 1; i >= start_rgn; i--) + memblock_remove_region(type, i); + return 0; +} + +int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) +{ + phys_addr_t end = base + size - 1; + + memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, + &base, &end, (void *)_RET_IP_); + + return memblock_remove_range(&memblock.memory, base, size); +} + +/** + * memblock_free - free boot memory allocation + * @ptr: starting address of the boot memory allocation + * @size: size of the boot memory block in bytes + * + * Free boot memory block previously allocated by memblock_alloc_xx() API. + * The freeing memory will not be released to the buddy allocator. + */ +void __init_memblock memblock_free(void *ptr, size_t size) +{ + if (ptr) + memblock_phys_free(__pa(ptr), size); +} + +/** + * memblock_phys_free - free boot memory block + * @base: phys starting address of the boot memory block + * @size: size of the boot memory block in bytes + * + * Free boot memory block previously allocated by memblock_phys_alloc_xx() API. + * The freeing memory will not be released to the buddy allocator. + */ +int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) +{ + phys_addr_t end = base + size - 1; + + memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, + &base, &end, (void *)_RET_IP_); + + kmemleak_free_part_phys(base, size); + return memblock_remove_range(&memblock.reserved, base, size); +} + +/* IAMROOT20_20240302 + * memblock.reserved에 base부터 end까지 region을 등록 + */ +/* IAMROOT20_END 20240302 */ +int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) +{ + phys_addr_t end = base + size - 1; + + memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, + &base, &end, (void *)_RET_IP_); + + return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0); +} + +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size) +{ + phys_addr_t end = base + size - 1; + + memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, + &base, &end, (void *)_RET_IP_); + + return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0); +} +#endif + +/** + * memblock_setclr_flag - set or clear flag for a memory region + * @base: base address of the region + * @size: size of the region + * @set: set or clear the flag + * @flag: the flag to update + * + * This function isolates region [@base, @base + @size), and sets/clears flag + * + * Return: 0 on success, -errno on failure. + */ +static int __init_memblock memblock_setclr_flag(phys_addr_t base, + phys_addr_t size, int set, int flag) +{ + struct memblock_type *type = &memblock.memory; + int i, ret, start_rgn, end_rgn; + + ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); + if (ret) + return ret; + + for (i = start_rgn; i < end_rgn; i++) { + struct memblock_region *r = &type->regions[i]; + + if (set) + r->flags |= flag; + else + r->flags &= ~flag; + } + + memblock_merge_regions(type, start_rgn, end_rgn); + return 0; +} + +/** + * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. + * @base: the base phys addr of the region + * @size: the size of the region + * + * Return: 0 on success, -errno on failure. + */ +int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) +{ + return memblock_setclr_flag(base, size, 1, MEMBLOCK_HOTPLUG); +} + +/** + * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. + * @base: the base phys addr of the region + * @size: the size of the region + * + * Return: 0 on success, -errno on failure. + */ +int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) +{ + return memblock_setclr_flag(base, size, 0, MEMBLOCK_HOTPLUG); +} + +/** + * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR. + * @base: the base phys addr of the region + * @size: the size of the region + * + * Return: 0 on success, -errno on failure. + */ +int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) +{ + if (!mirrored_kernelcore) + return 0; + + system_has_some_mirror = true; + + return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR); +} + +/** + * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP. + * @base: the base phys addr of the region + * @size: the size of the region + * + * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the + * direct mapping of the physical memory. These regions will still be + * covered by the memory map. The struct page representing NOMAP memory + * frames in the memory map will be PageReserved() + * + * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from + * memblock, the caller must inform kmemleak to ignore that memory + * + * Return: 0 on success, -errno on failure. + */ +int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) +{ + return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP); +} + +/** + * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. + * @base: the base phys addr of the region + * @size: the size of the region + * + * Return: 0 on success, -errno on failure. + */ +int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) +{ + return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP); +} + +static bool should_skip_region(struct memblock_type *type, + struct memblock_region *m, + int nid, int flags) +{ + int m_nid = memblock_get_region_node(m); + + /* we never skip regions when iterating memblock.reserved or physmem */ + if (type != memblock_memory) + return false; + + /* only memory regions are associated with nodes, check it */ + if (nid != NUMA_NO_NODE && nid != m_nid) + return true; + + /* skip hotpluggable memory regions if needed */ + if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && + !(flags & MEMBLOCK_HOTPLUG)) + return true; + + /* if we want mirror memory skip non-mirror memory regions */ + if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) + return true; + + /* skip nomap memory unless we were asked for it explicitly */ + if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) + return true; + + /* skip driver-managed memory unless we were asked for it explicitly */ + if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m)) + return true; + + return false; +} + +/** + * __next_mem_range - next function for for_each_free_mem_range() etc. + * @idx: pointer to u64 loop variable + * @nid: node selector, %NUMA_NO_NODE for all nodes + * @flags: pick from blocks based on memory attributes + * @type_a: pointer to memblock_type from where the range is taken + * @type_b: pointer to memblock_type which excludes memory from being taken + * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @out_nid: ptr to int for nid of the range, can be %NULL + * + * Find the first area from *@idx which matches @nid, fill the out + * parameters, and update *@idx for the next iteration. The lower 32bit of + * *@idx contains index into type_a and the upper 32bit indexes the + * areas before each region in type_b. For example, if type_b regions + * look like the following, + * + * 0:[0-16), 1:[32-48), 2:[128-130) + * + * The upper 32bit indexes the following regions. + * + * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) + * + * As both region arrays are sorted, the function advances the two indices + * in lockstep and returns each intersection. + */ +void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, + struct memblock_type *type_a, + struct memblock_type *type_b, phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid) +{ + int idx_a = *idx & 0xffffffff; + int idx_b = *idx >> 32; + + if (WARN_ONCE(nid == MAX_NUMNODES, + "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) + nid = NUMA_NO_NODE; + + for (; idx_a < type_a->cnt; idx_a++) { + struct memblock_region *m = &type_a->regions[idx_a]; + + phys_addr_t m_start = m->base; + phys_addr_t m_end = m->base + m->size; + int m_nid = memblock_get_region_node(m); + + if (should_skip_region(type_a, m, nid, flags)) + continue; + + if (!type_b) { + if (out_start) + *out_start = m_start; + if (out_end) + *out_end = m_end; + if (out_nid) + *out_nid = m_nid; + idx_a++; + *idx = (u32)idx_a | (u64)idx_b << 32; + return; + } + + /* scan areas before each reservation */ + for (; idx_b < type_b->cnt + 1; idx_b++) { + struct memblock_region *r; + phys_addr_t r_start; + phys_addr_t r_end; + + r = &type_b->regions[idx_b]; + r_start = idx_b ? r[-1].base + r[-1].size : 0; + r_end = idx_b < type_b->cnt ? + r->base : PHYS_ADDR_MAX; + + /* + * if idx_b advanced past idx_a, + * break out to advance idx_a + */ + if (r_start >= m_end) + break; + /* if the two regions intersect, we're done */ + if (m_start < r_end) { + if (out_start) + *out_start = + max(m_start, r_start); + if (out_end) + *out_end = min(m_end, r_end); + if (out_nid) + *out_nid = m_nid; + /* + * The region which ends first is + * advanced for the next iteration. + */ + if (m_end <= r_end) + idx_a++; + else + idx_b++; + *idx = (u32)idx_a | (u64)idx_b << 32; + return; + } + } + } + + /* signal end of iteration */ + *idx = ULLONG_MAX; +} + +/** + * __next_mem_range_rev - generic next function for for_each_*_range_rev() + * + * @idx: pointer to u64 loop variable + * @nid: node selector, %NUMA_NO_NODE for all nodes + * @flags: pick from blocks based on memory attributes + * @type_a: pointer to memblock_type from where the range is taken + * @type_b: pointer to memblock_type which excludes memory from being taken + * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @out_nid: ptr to int for nid of the range, can be %NULL + * + * Finds the next range from type_a which is not marked as unsuitable + * in type_b. + * + * Reverse of __next_mem_range(). + */ +void __init_memblock __next_mem_range_rev(u64 *idx, int nid, + enum memblock_flags flags, + struct memblock_type *type_a, + struct memblock_type *type_b, + phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid) +{ + int idx_a = *idx & 0xffffffff; + int idx_b = *idx >> 32; + + if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) + nid = NUMA_NO_NODE; + + if (*idx == (u64)ULLONG_MAX) { + idx_a = type_a->cnt - 1; + if (type_b != NULL) + idx_b = type_b->cnt; + else + idx_b = 0; + } + + for (; idx_a >= 0; idx_a--) { + struct memblock_region *m = &type_a->regions[idx_a]; + + phys_addr_t m_start = m->base; + phys_addr_t m_end = m->base + m->size; + int m_nid = memblock_get_region_node(m); + + if (should_skip_region(type_a, m, nid, flags)) + continue; + + if (!type_b) { + if (out_start) + *out_start = m_start; + if (out_end) + *out_end = m_end; + if (out_nid) + *out_nid = m_nid; + idx_a--; + *idx = (u32)idx_a | (u64)idx_b << 32; + return; + } + + /* scan areas before each reservation */ + for (; idx_b >= 0; idx_b--) { + struct memblock_region *r; + phys_addr_t r_start; + phys_addr_t r_end; + + r = &type_b->regions[idx_b]; + r_start = idx_b ? r[-1].base + r[-1].size : 0; + r_end = idx_b < type_b->cnt ? + r->base : PHYS_ADDR_MAX; + /* + * if idx_b advanced past idx_a, + * break out to advance idx_a + */ + + if (r_end <= m_start) + break; + /* if the two regions intersect, we're done */ + if (m_end > r_start) { + if (out_start) + *out_start = max(m_start, r_start); + if (out_end) + *out_end = min(m_end, r_end); + if (out_nid) + *out_nid = m_nid; + if (m_start >= r_start) + idx_a--; + else + idx_b--; + *idx = (u32)idx_a | (u64)idx_b << 32; + return; + } + } + } + /* signal end of iteration */ + *idx = ULLONG_MAX; +} + +/* + * Common iterator interface used to define for_each_mem_pfn_range(). + */ +void __init_memblock __next_mem_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid) +{ + struct memblock_type *type = &memblock.memory; + struct memblock_region *r; + int r_nid; + + while (++*idx < type->cnt) { + r = &type->regions[*idx]; + r_nid = memblock_get_region_node(r); + + if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) + continue; + if (nid == MAX_NUMNODES || nid == r_nid) + break; + } + if (*idx >= type->cnt) { + *idx = -1; + return; + } + + if (out_start_pfn) + *out_start_pfn = PFN_UP(r->base); + if (out_end_pfn) + *out_end_pfn = PFN_DOWN(r->base + r->size); + if (out_nid) + *out_nid = r_nid; +} + +/** + * memblock_set_node - set node ID on memblock regions + * @base: base of area to set node ID for + * @size: size of area to set node ID for + * @type: memblock type to set node ID for + * @nid: node ID to set + * + * Set the nid of memblock @type regions in [@base, @base + @size) to @nid. + * Regions which cross the area boundaries are split as necessary. + * + * Return: + * 0 on success, -errno on failure. + */ +int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, + struct memblock_type *type, int nid) +{ +#ifdef CONFIG_NUMA + int start_rgn, end_rgn; + int i, ret; + + ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); + if (ret) + return ret; + + for (i = start_rgn; i < end_rgn; i++) + memblock_set_region_node(&type->regions[i], nid); + + memblock_merge_regions(type, start_rgn, end_rgn); +#endif + return 0; +} + +#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT +/** + * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() + * + * @idx: pointer to u64 loop variable + * @zone: zone in which all of the memory blocks reside + * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL + * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL + * + * This function is meant to be a zone/pfn specific wrapper for the + * for_each_mem_range type iterators. Specifically they are used in the + * deferred memory init routines and as such we were duplicating much of + * this logic throughout the code. So instead of having it in multiple + * locations it seemed like it would make more sense to centralize this to + * one new iterator that does everything they need. + */ +void __init_memblock +__next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, + unsigned long *out_spfn, unsigned long *out_epfn) +{ + int zone_nid = zone_to_nid(zone); + phys_addr_t spa, epa; + + __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, + &memblock.memory, &memblock.reserved, + &spa, &epa, NULL); + + while (*idx != U64_MAX) { + unsigned long epfn = PFN_DOWN(epa); + unsigned long spfn = PFN_UP(spa); + + /* + * Verify the end is at least past the start of the zone and + * that we have at least one PFN to initialize. + */ + if (zone->zone_start_pfn < epfn && spfn < epfn) { + /* if we went too far just stop searching */ + if (zone_end_pfn(zone) <= spfn) { + *idx = U64_MAX; + break; + } + + if (out_spfn) + *out_spfn = max(zone->zone_start_pfn, spfn); + if (out_epfn) + *out_epfn = min(zone_end_pfn(zone), epfn); + + return; + } + + __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, + &memblock.memory, &memblock.reserved, + &spa, &epa, NULL); + } + + /* signal end of iteration */ + if (out_spfn) + *out_spfn = ULONG_MAX; + if (out_epfn) + *out_epfn = 0; +} + +#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ + +/** + * memblock_alloc_range_nid - allocate boot memory block + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @start: the lower bound of the memory region to allocate (phys address) + * @end: the upper bound of the memory region to allocate (phys address) + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * @exact_nid: control the allocation fall back to other nodes + * + * The allocation is performed from memory region limited by + * memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE. + * + * If the specified node can not hold the requested memory and @exact_nid + * is false, the allocation falls back to any node in the system. + * + * For systems with memory mirroring, the allocation is attempted first + * from the regions with mirroring enabled and then retried from any + * memory region. + * + * In addition, function using kmemleak_alloc_phys for allocated boot + * memory block, it is never reported as leaks. + * + * Return: + * Physical address of allocated memory block on success, %0 on failure. + */ +phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, + phys_addr_t align, phys_addr_t start, + phys_addr_t end, int nid, + bool exact_nid) +{ + enum memblock_flags flags = choose_memblock_flags(); + phys_addr_t found; + + if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) + nid = NUMA_NO_NODE; + + if (!align) { + /* Can't use WARNs this early in boot on powerpc */ + dump_stack(); + align = SMP_CACHE_BYTES; + } + +again: + found = memblock_find_in_range_node(size, align, start, end, nid, + flags); + if (found && !memblock_reserve(found, size)) + goto done; + + if (nid != NUMA_NO_NODE && !exact_nid) { + found = memblock_find_in_range_node(size, align, start, + end, NUMA_NO_NODE, + flags); + if (found && !memblock_reserve(found, size)) + goto done; + } + + if (flags & MEMBLOCK_MIRROR) { + flags &= ~MEMBLOCK_MIRROR; + pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", + &size); + goto again; + } + + return 0; + +done: + /* + * Skip kmemleak for those places like kasan_init() and + * early_pgtable_alloc() due to high volume. + */ + if (end != MEMBLOCK_ALLOC_NOLEAKTRACE) + /* + * Memblock allocated blocks are never reported as + * leaks. This is because many of these blocks are + * only referred via the physical address which is + * not looked up by kmemleak. + */ + kmemleak_alloc_phys(found, size, 0); + + return found; +} + +/** + * memblock_phys_alloc_range - allocate a memory block inside specified range + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @start: the lower bound of the memory region to allocate (physical address) + * @end: the upper bound of the memory region to allocate (physical address) + * + * Allocate @size bytes in the between @start and @end. + * + * Return: physical address of the allocated memory block on success, + * %0 on failure. + */ +phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, + phys_addr_t align, + phys_addr_t start, + phys_addr_t end) +{ + memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n", + __func__, (u64)size, (u64)align, &start, &end, + (void *)_RET_IP_); + return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, + false); +} + +/** + * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Allocates memory block from the specified NUMA node. If the node + * has no available memory, attempts to allocated from any node in the + * system. + * + * Return: physical address of the allocated memory block on success, + * %0 on failure. + */ +phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) +{ + return memblock_alloc_range_nid(size, align, 0, + MEMBLOCK_ALLOC_ACCESSIBLE, nid, false); +} + +/** + * memblock_alloc_internal - allocate boot memory block + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @min_addr: the lower bound of the memory region to allocate (phys address) + * @max_addr: the upper bound of the memory region to allocate (phys address) + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * @exact_nid: control the allocation fall back to other nodes + * + * Allocates memory block using memblock_alloc_range_nid() and + * converts the returned physical address to virtual. + * + * The @min_addr limit is dropped if it can not be satisfied and the allocation + * will fall back to memory below @min_addr. Other constraints, such + * as node and mirrored memory will be handled again in + * memblock_alloc_range_nid(). + * + * Return: + * Virtual address of allocated memory block on success, NULL on failure. + */ +static void * __init memblock_alloc_internal( + phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid, bool exact_nid) +{ + phys_addr_t alloc; + + /* + * Detect any accidental use of these APIs after slab is ready, as at + * this moment memblock may be deinitialized already and its + * internal data may be destroyed (after execution of memblock_free_all) + */ + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, nid); + + if (max_addr > memblock.current_limit) + max_addr = memblock.current_limit; + + alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid, + exact_nid); + + /* retry allocation without lower limit */ + if (!alloc && min_addr) + alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid, + exact_nid); + + if (!alloc) + return NULL; + + return phys_to_virt(alloc); +} + +/** + * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node + * without zeroing memory + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @min_addr: the lower bound of the memory region from where the allocation + * is preferred (phys address) + * @max_addr: the upper bound of the memory region from where the allocation + * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to + * allocate only from memory limited by memblock.current_limit value + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Public function, provides additional debug information (including caller + * info), if enabled. Does not zero allocated memory. + * + * Return: + * Virtual address of allocated memory block on success, NULL on failure. + */ +void * __init memblock_alloc_exact_nid_raw( + phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid) +{ + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", + __func__, (u64)size, (u64)align, nid, &min_addr, + &max_addr, (void *)_RET_IP_); + + return memblock_alloc_internal(size, align, min_addr, max_addr, nid, + true); +} + +/** + * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing + * memory and without panicking + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @min_addr: the lower bound of the memory region from where the allocation + * is preferred (phys address) + * @max_addr: the upper bound of the memory region from where the allocation + * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to + * allocate only from memory limited by memblock.current_limit value + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Public function, provides additional debug information (including caller + * info), if enabled. Does not zero allocated memory, does not panic if request + * cannot be satisfied. + * + * Return: + * Virtual address of allocated memory block on success, NULL on failure. + */ +void * __init memblock_alloc_try_nid_raw( + phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid) +{ + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", + __func__, (u64)size, (u64)align, nid, &min_addr, + &max_addr, (void *)_RET_IP_); + + return memblock_alloc_internal(size, align, min_addr, max_addr, nid, + false); +} + +/** + * memblock_alloc_try_nid - allocate boot memory block + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @min_addr: the lower bound of the memory region from where the allocation + * is preferred (phys address) + * @max_addr: the upper bound of the memory region from where the allocation + * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to + * allocate only from memory limited by memblock.current_limit value + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Public function, provides additional debug information (including caller + * info), if enabled. This function zeroes the allocated memory. + * + * Return: + * Virtual address of allocated memory block on success, NULL on failure. + */ +void * __init memblock_alloc_try_nid( + phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid) +{ + void *ptr; + + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", + __func__, (u64)size, (u64)align, nid, &min_addr, + &max_addr, (void *)_RET_IP_); + ptr = memblock_alloc_internal(size, align, + min_addr, max_addr, nid, false); + if (ptr) + memset(ptr, 0, size); + + return ptr; +} + +/** + * memblock_free_late - free pages directly to buddy allocator + * @base: phys starting address of the boot memory block + * @size: size of the boot memory block in bytes + * + * This is only useful when the memblock allocator has already been torn + * down, but we are still initializing the system. Pages are released directly + * to the buddy allocator. + */ +void __init memblock_free_late(phys_addr_t base, phys_addr_t size) +{ + phys_addr_t cursor, end; + + end = base + size - 1; + memblock_dbg("%s: [%pa-%pa] %pS\n", + __func__, &base, &end, (void *)_RET_IP_); + kmemleak_free_part_phys(base, size); + cursor = PFN_UP(base); + end = PFN_DOWN(base + size); + + for (; cursor < end; cursor++) { + memblock_free_pages(pfn_to_page(cursor), cursor, 0); + totalram_pages_inc(); + } +} + +/* + * Remaining API functions + */ + +phys_addr_t __init_memblock memblock_phys_mem_size(void) +{ + return memblock.memory.total_size; +} + +phys_addr_t __init_memblock memblock_reserved_size(void) +{ + return memblock.reserved.total_size; +} + +/* lowest address */ +phys_addr_t __init_memblock memblock_start_of_DRAM(void) +{ + return memblock.memory.regions[0].base; +} + +phys_addr_t __init_memblock memblock_end_of_DRAM(void) +{ + int idx = memblock.memory.cnt - 1; + + return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); +} + +static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) +{ + phys_addr_t max_addr = PHYS_ADDR_MAX; + struct memblock_region *r; + + /* + * translate the memory @limit size into the max address within one of + * the memory memblock regions, if the @limit exceeds the total size + * of those regions, max_addr will keep original value PHYS_ADDR_MAX + */ + for_each_mem_region(r) { + if (limit <= r->size) { + max_addr = r->base + limit; + break; + } + limit -= r->size; + } + + return max_addr; +} + +void __init memblock_enforce_memory_limit(phys_addr_t limit) +{ + phys_addr_t max_addr; + + if (!limit) + return; + + max_addr = __find_max_addr(limit); + + /* @limit exceeds the total size of the memory, do nothing */ + if (max_addr == PHYS_ADDR_MAX) + return; + + /* truncate both memory and reserved regions */ + memblock_remove_range(&memblock.memory, max_addr, + PHYS_ADDR_MAX); + memblock_remove_range(&memblock.reserved, max_addr, + PHYS_ADDR_MAX); +} + +void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) +{ + int start_rgn, end_rgn; + int i, ret; + + if (!size) + return; + + if (!memblock_memory->total_size) { + pr_warn("%s: No memory registered yet\n", __func__); + return; + } + + ret = memblock_isolate_range(&memblock.memory, base, size, + &start_rgn, &end_rgn); + if (ret) + return; + + /* remove all the MAP regions */ + for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) + if (!memblock_is_nomap(&memblock.memory.regions[i])) + memblock_remove_region(&memblock.memory, i); + + for (i = start_rgn - 1; i >= 0; i--) + if (!memblock_is_nomap(&memblock.memory.regions[i])) + memblock_remove_region(&memblock.memory, i); + + /* truncate the reserved regions */ + memblock_remove_range(&memblock.reserved, 0, base); + memblock_remove_range(&memblock.reserved, + base + size, PHYS_ADDR_MAX); +} + +void __init memblock_mem_limit_remove_map(phys_addr_t limit) +{ + phys_addr_t max_addr; + + if (!limit) + return; + + max_addr = __find_max_addr(limit); + + /* @limit exceeds the total size of the memory, do nothing */ + if (max_addr == PHYS_ADDR_MAX) + return; + + memblock_cap_memory_range(0, max_addr); +} + +static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) +{ + unsigned int left = 0, right = type->cnt; + + do { + unsigned int mid = (right + left) / 2; + + if (addr < type->regions[mid].base) + right = mid; + else if (addr >= (type->regions[mid].base + + type->regions[mid].size)) + left = mid + 1; + else + return mid; + } while (left < right); + return -1; +} + +bool __init_memblock memblock_is_reserved(phys_addr_t addr) +{ + return memblock_search(&memblock.reserved, addr) != -1; +} + +bool __init_memblock memblock_is_memory(phys_addr_t addr) +{ + return memblock_search(&memblock.memory, addr) != -1; +} + +bool __init_memblock memblock_is_map_memory(phys_addr_t addr) +{ + int i = memblock_search(&memblock.memory, addr); + + if (i == -1) + return false; + return !memblock_is_nomap(&memblock.memory.regions[i]); +} + +int __init_memblock memblock_search_pfn_nid(unsigned long pfn, + unsigned long *start_pfn, unsigned long *end_pfn) +{ + struct memblock_type *type = &memblock.memory; + int mid = memblock_search(type, PFN_PHYS(pfn)); + + if (mid == -1) + return -1; + + *start_pfn = PFN_DOWN(type->regions[mid].base); + *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); + + return memblock_get_region_node(&type->regions[mid]); +} + +/** + * memblock_is_region_memory - check if a region is a subset of memory + * @base: base of region to check + * @size: size of region to check + * + * Check if the region [@base, @base + @size) is a subset of a memory block. + * + * Return: + * 0 if false, non-zero if true + */ +bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) +{ + int idx = memblock_search(&memblock.memory, base); + phys_addr_t end = base + memblock_cap_size(base, &size); + + if (idx == -1) + return false; + return (memblock.memory.regions[idx].base + + memblock.memory.regions[idx].size) >= end; +} + +/** + * memblock_is_region_reserved - check if a region intersects reserved memory + * @base: base of region to check + * @size: size of region to check + * + * Check if the region [@base, @base + @size) intersects a reserved + * memory block. + * + * Return: + * True if they intersect, false if not. + */ +bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) +{ + return memblock_overlaps_region(&memblock.reserved, base, size); +} + +void __init_memblock memblock_trim_memory(phys_addr_t align) +{ + phys_addr_t start, end, orig_start, orig_end; + struct memblock_region *r; + + for_each_mem_region(r) { + orig_start = r->base; + orig_end = r->base + r->size; + start = round_up(orig_start, align); + end = round_down(orig_end, align); + + if (start == orig_start && end == orig_end) + continue; + + if (start < end) { + r->base = start; + r->size = end - start; + } else { + memblock_remove_region(&memblock.memory, + r - memblock.memory.regions); + r--; + } + } +} + +void __init_memblock memblock_set_current_limit(phys_addr_t limit) +{ + memblock.current_limit = limit; +} + +phys_addr_t __init_memblock memblock_get_current_limit(void) +{ + return memblock.current_limit; +} + +static void __init_memblock memblock_dump(struct memblock_type *type) +{ + phys_addr_t base, end, size; + enum memblock_flags flags; + int idx; + struct memblock_region *rgn; + + pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); + + for_each_memblock_type(idx, type, rgn) { + char nid_buf[32] = ""; + + base = rgn->base; + size = rgn->size; + end = base + size - 1; + flags = rgn->flags; +#ifdef CONFIG_NUMA + if (memblock_get_region_node(rgn) != MAX_NUMNODES) + snprintf(nid_buf, sizeof(nid_buf), " on node %d", + memblock_get_region_node(rgn)); +#endif + pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n", + type->name, idx, &base, &end, &size, nid_buf, flags); + } +} + +static void __init_memblock __memblock_dump_all(void) +{ + pr_info("MEMBLOCK configuration:\n"); + pr_info(" memory size = %pa reserved size = %pa\n", + &memblock.memory.total_size, + &memblock.reserved.total_size); + + memblock_dump(&memblock.memory); + memblock_dump(&memblock.reserved); +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP + memblock_dump(&physmem); +#endif +} + +void __init_memblock memblock_dump_all(void) +{ + if (memblock_debug) + __memblock_dump_all(); +} + +void __init memblock_allow_resize(void) +{ + memblock_can_resize = 1; +} + +static int __init early_memblock(char *p) +{ + if (p && strstr(p, "debug")) + memblock_debug = 1; + return 0; +} +early_param("memblock", early_memblock); + +static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) +{ + struct page *start_pg, *end_pg; + phys_addr_t pg, pgend; + + /* + * Convert start_pfn/end_pfn to a struct page pointer. + */ + start_pg = pfn_to_page(start_pfn - 1) + 1; + end_pg = pfn_to_page(end_pfn - 1) + 1; + + /* + * Convert to physical addresses, and round start upwards and end + * downwards. + */ + pg = PAGE_ALIGN(__pa(start_pg)); + pgend = __pa(end_pg) & PAGE_MASK; + + /* + * If there are free pages between these, free the section of the + * memmap array. + */ + if (pg < pgend) + memblock_phys_free(pg, pgend - pg); +} + +/* + * The mem_map array can get very big. Free the unused area of the memory map. + */ +static void __init free_unused_memmap(void) +{ + unsigned long start, end, prev_end = 0; + int i; + + if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) || + IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) + return; + + /* + * This relies on each bank being in address order. + * The banks are sorted previously in bootmem_init(). + */ + for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { +#ifdef CONFIG_SPARSEMEM + /* + * Take care not to free memmap entries that don't exist + * due to SPARSEMEM sections which aren't present. + */ + start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); +#endif + /* + * Align down here since many operations in VM subsystem + * presume that there are no holes in the memory map inside + * a pageblock + */ + start = pageblock_start_pfn(start); + + /* + * If we had a previous bank, and there is a space + * between the current bank and the previous, free it. + */ + if (prev_end && prev_end < start) + free_memmap(prev_end, start); + + /* + * Align up here since many operations in VM subsystem + * presume that there are no holes in the memory map inside + * a pageblock + */ + prev_end = pageblock_align(end); + } + +#ifdef CONFIG_SPARSEMEM + if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) { + prev_end = pageblock_align(end); + free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); + } +#endif +} + +static void __init __free_pages_memory(unsigned long start, unsigned long end) +{ + int order; + + while (start < end) { + /* + * Free the pages in the largest chunks alignment allows. + * + * __ffs() behaviour is undefined for 0. start == 0 is + * MAX_ORDER-aligned, set order to MAX_ORDER for the case. + */ + if (start) + order = min_t(int, MAX_ORDER, __ffs(start)); + else + order = MAX_ORDER; + + while (start + (1UL << order) > end) + order--; + + memblock_free_pages(pfn_to_page(start), start, order); + + start += (1UL << order); + } +} + +static unsigned long __init __free_memory_core(phys_addr_t start, + phys_addr_t end) +{ + unsigned long start_pfn = PFN_UP(start); + unsigned long end_pfn = min_t(unsigned long, + PFN_DOWN(end), max_low_pfn); + + if (start_pfn >= end_pfn) + return 0; + + __free_pages_memory(start_pfn, end_pfn); + + return end_pfn - start_pfn; +} + +static void __init memmap_init_reserved_pages(void) +{ + struct memblock_region *region; + phys_addr_t start, end; + u64 i; + + /* initialize struct pages for the reserved regions */ + for_each_reserved_mem_range(i, &start, &end) + reserve_bootmem_region(start, end); + + /* and also treat struct pages for the NOMAP regions as PageReserved */ + for_each_mem_region(region) { + if (memblock_is_nomap(region)) { + start = region->base; + end = start + region->size; + reserve_bootmem_region(start, end); + } + } +} + +static unsigned long __init free_low_memory_core_early(void) +{ + unsigned long count = 0; + phys_addr_t start, end; + u64 i; + + memblock_clear_hotplug(0, -1); + + memmap_init_reserved_pages(); + + /* + * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id + * because in some case like Node0 doesn't have RAM installed + * low ram will be on Node1 + */ + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, + NULL) + count += __free_memory_core(start, end); + + return count; +} + +static int reset_managed_pages_done __initdata; + +void reset_node_managed_pages(pg_data_t *pgdat) +{ + struct zone *z; + + for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) + atomic_long_set(&z->managed_pages, 0); +} + +void __init reset_all_zones_managed_pages(void) +{ + struct pglist_data *pgdat; + + if (reset_managed_pages_done) + return; + + for_each_online_pgdat(pgdat) + reset_node_managed_pages(pgdat); + + reset_managed_pages_done = 1; +} + +/** + * memblock_free_all - release free pages to the buddy allocator + */ +void __init memblock_free_all(void) +{ + unsigned long pages; + + free_unused_memmap(); + reset_all_zones_managed_pages(); + + pages = free_low_memory_core_early(); + totalram_pages_add(pages); +} + +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) + +static int memblock_debug_show(struct seq_file *m, void *private) +{ + struct memblock_type *type = m->private; + struct memblock_region *reg; + int i; + phys_addr_t end; + + for (i = 0; i < type->cnt; i++) { + reg = &type->regions[i]; + end = reg->base + reg->size - 1; + + seq_printf(m, "%4d: ", i); + seq_printf(m, "%pa..%pa\n", ®->base, &end); + } + return 0; +} +DEFINE_SHOW_ATTRIBUTE(memblock_debug); + +static int __init memblock_init_debugfs(void) +{ + struct dentry *root = debugfs_create_dir("memblock", NULL); + + debugfs_create_file("memory", 0444, root, + &memblock.memory, &memblock_debug_fops); + debugfs_create_file("reserved", 0444, root, + &memblock.reserved, &memblock_debug_fops); +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP + debugfs_create_file("physmem", 0444, root, &physmem, + &memblock_debug_fops); +#endif + + return 0; +} +__initcall(memblock_init_debugfs); + +#endif /* CONFIG_DEBUG_FS */ diff --git a/mm/memblock.c b/mm/memblock.c index 67aac85dbfe55..5f3ebbb04935c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -147,7 +147,6 @@ struct memblock_type physmem = { * pointer will be reset to NULL at memblock_discard() */ static __refdata struct memblock_type *memblock_memory = &memblock.memory; - #define for_each_memblock_type(i, memblock_type, rgn) \ for (i = 0, rgn = &memblock_type->regions[0]; \ i < memblock_type->cnt; \ @@ -171,6 +170,13 @@ static enum memblock_flags __init_memblock choose_memblock_flags(void) } /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ +/* IAMROOT20 20240309 + * ex) PHYS_ADDR_MAX = 0xffff_ffff_ffff_ffff + base = 0xffff_ffff_ffff_fff0 + size = 0x100 + return size = min(0x100, 0xf) = 0xf; + (base + size)가 overflow가 발생하면 (PHYS_ADDR_MAX - base)를 size로 재설정 + */ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) { return *size = min(*size, PHYS_ADDR_MAX - base); @@ -508,6 +514,9 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, * @end_rgn: end scanning at (@end_rgn - 1) * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn) */ +/* IAMROOT20 20240309 + * (start_rgn - 1) index부터 end_rgn index미만까지 merge + */ static void __init_memblock memblock_merge_regions(struct memblock_type *type, unsigned long start_rgn, unsigned long end_rgn) @@ -524,13 +533,22 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type, memblock_get_region_node(this) != memblock_get_region_node(next) || this->flags != next->flags) { + /* IAMROOT20 20240309 + * this region과 next region이 인접하지 않거나 같은 node가 아니면 merge할 수 없으므로 다음 region을 확인하러 넘어감 + */ BUG_ON(this->base + this->size > next->base); i++; continue; } + /* IAMROOT20 20240309 + * this region과 next region을 merge + */ this->size += next->size; /* move forward from next + 1, index of which is i + 2 */ + /* IAMROOT20 20240309 + * (next + 1)부터 (type->cnt - (i + 2))개를 앞으로 한칸씩 이동시킴 + */ memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); type->cnt--; end_rgn--; @@ -549,6 +567,9 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type, * Insert new memblock region [@base, @base + @size) into @type at @idx. * @type must already have extra room to accommodate the new region. */ +/* IAMROOT20 20240309 + * (base ~ size) 영역을 memblock->type->region[idx] 위치에 insert + */ static void __init_memblock memblock_insert_region(struct memblock_type *type, int idx, phys_addr_t base, phys_addr_t size, @@ -558,7 +579,13 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type, struct memblock_region *rgn = &type->regions[idx]; BUG_ON(type->cnt >= type->max); + /* IAMROOT20 20240309 + * memblock->type->region[idx]부터 뒤로 한 칸씩 이동시킴 + */ memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); + /* IAMROOT20 20240309 + * memblock->type->region[idx] 위치에 new region을 insert + */ rgn->base = base; rgn->size = size; rgn->flags = flags; @@ -583,6 +610,10 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type, * Return: * 0 on success, -errno on failure. */ +/* IAMROOT20_START 20240309 */ +/* IAMROOT20 20240309 + * 참고: http://jake.dothome.co.kr/memblock-1/ + */ static int __init_memblock memblock_add_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size, int nid, enum memblock_flags flags) @@ -597,6 +628,10 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, return 0; /* special case for empty array */ + /* IAMROOT20 20240309 + * type에 등록된 regions이 없을 경우 + * new region을 등록 후 함수 종료 + */ if (type->regions[0].size == 0) { WARN_ON(type->cnt != 1 || type->total_size); type->regions[0].base = base; @@ -614,9 +649,17 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, * that there is enough empty regions in @type, and we can insert * regions directly. */ + /* IAMROOT20 20240309 + * 현재 최대로 넣을 수 있는 region 갯수는 type->max인데 + * 최악의 경우인 (type->cnt * 2 + 1)보다 type->max가 작거나 같으면 insert를 true로 설정하여 repeat을 방지함 + */ if (type->cnt * 2 + 1 <= type->max) insert = true; +/* IAMROOT20 20240309 + * insert가 false이면 먼저 정확한 new_region이 필요한 영역을 확인하고 insert를 true로 변경 + * insert가 true이면 repeat을 다시 실행하지 않고 실제 new region을 insert + */ repeat: /* * The following is executed twice. Once with %false @insert and @@ -626,18 +669,30 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, base = obase; nr_new = 0; + /* IAMROOT20 20240309 + * 기존에 존재하는 region을 순회 + */ for_each_memblock_type(idx, type, rgn) { phys_addr_t rbase = rgn->base; phys_addr_t rend = rbase + rgn->size; + /* IAMROOT20 20240309 + * (rbase >= end)일 경우 더 이상 new region이 기존 region과 겹치지 않으므로 반복문 종료 + */ if (rbase >= end) break; + /* IAMROOT20 20240309 + * (rend <= base)일 경우 new region의 현재 비교하고 있는 region과 겹치지 않으므로 다음 region으로 진행 + */ if (rend <= base) continue; /* * @rgn overlaps. If it separates the lower part of new * area, insert that portion. */ + /* IAMROOT20 20240309 + * (rbase > base)일 경우 new region의 하위 부분과 현재 비교하고 있는 region이 겹치므로 new region을 하위 부분인 (base ~ rbase) 영역를 insert + */ if (rbase > base) { #ifdef CONFIG_NUMA WARN_ON(nid != memblock_get_region_node(rgn)); @@ -654,10 +709,16 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, } } /* area below @rend is dealt with, forget about it */ + /* IAMROOT20 20240309 + * base를 새롭게 설정 + */ base = min(rend, end); } /* insert the remaining portion */ + /* IAMROOT20 20240309 + * 나머지 (base ~ end) 영역을 insert + */ if (base < end) { nr_new++; if (insert) { @@ -669,6 +730,9 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, } } + /* IAMROOT20 20240309 + * insert 해야하는 new region이 없으면 함수 종료 + */ if (!nr_new) return 0; @@ -677,12 +741,18 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, * insertions; otherwise, merge and return. */ if (!insert) { + /* IAMROOT20 20240309 + * insert 해야될 new region이 max region size보다 크면 memblock_double_array를 호출하여 memblock.type의 사이즈를 2배 증가시킴 + */ while (type->cnt + nr_new > type->max) if (memblock_double_array(type, obase, size) < 0) return -ENOMEM; insert = true; goto repeat; } else { + /* IAMROOT20 20240309 + * 모든 insert 작업이 끝나고 인접한 region들끼리 merge를 수행 + */ memblock_merge_regions(type, start_rgn, end_rgn); return 0; } From a4f154c62bbf9fb755f322173fc37f4fd06a6727 Mon Sep 17 00:00:00 2001 From: SoominCho Date: Wed, 13 Mar 2024 12:49:58 +0000 Subject: [PATCH 044/104] IAMROOT20 20240309 Add comments to memblock_remove --- include/linux/memblock.h | 3 ++ mm/memblock.c | 70 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f82ee3fac1cdf..d34b509cd4057 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -340,6 +340,9 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask); * Walks over free (memory && !reserved) areas of memblock. Available as * soon as memblock is initialized. */ +/* IAMROOT20 20240309 + * 루프를 돌며 memory 영역에서 reserved 영역을 제외한 영역인 free 메모리 영역을 알아옴 + */ #define for_each_free_mem_range(i, nid, flags, p_start, p_end, p_nid) \ __for_each_mem_range(i, &memblock.memory, &memblock.reserved, \ nid, flags, p_start, p_end, p_nid) diff --git a/mm/memblock.c b/mm/memblock.c index 5f3ebbb04935c..e37f705e2ba8c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -227,12 +227,24 @@ __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, { phys_addr_t this_start, this_end, cand; u64 i; - + + /* IAMROOT20 20240309 + * 빈 memblock 공간을 루프를 돌며 하나씩 알아옴 + */ for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { + /* IAMROOT20 20240309 + * start ~ end 범위를 벗어나면 조정 + */ this_start = clamp(this_start, start, end); this_end = clamp(this_end, start, end); + /* IAMROOT20 20240309 + * 요청 사이즈의 비교를 align된 크기로 하기 위해 올림 + */ cand = round_up(this_start, align); + /* IAMROOT20 20240309 + * 알아온 free 영역의 범위에 size가 포함될 수 있으면 cand 주소를 리턴 + */ if (cand < this_end && this_end - cand >= size) return cand; } @@ -340,6 +352,9 @@ static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, ret = memblock_find_in_range_node(size, align, start, end, NUMA_NO_NODE, flags); + /* IAMROOT20 20240309 + * 미러 플래그가 요청된 상태에서 공간을 찾지 못한 경우 미러 플래그를 제거하고 다시 공간을 찾음 + */ if (!ret && (flags & MEMBLOCK_MIRROR)) { pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", &size); @@ -456,9 +471,15 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, if (type != &memblock.reserved) new_area_start = new_area_size = 0; + /* IAMROOT20 20240309 + * 새로 관리 영역을 할당받을 공간은 추가 요청 영역을 피해야 하므로 요청 영역의 상부를 먼저 검색 + */ addr = memblock_find_in_range(new_area_start + new_area_size, memblock.current_limit, new_alloc_size, PAGE_SIZE); + /* IAMROOT20 20240309 + * 첫 번째 검색에서 할당받지 못 하고 요청 타입이 reserved인 경우에 추가 요청 영역을 피해 하부를 검색 + */ if (!addr && new_area_size) addr = memblock_find_in_range(0, min(new_area_start, memblock.current_limit), @@ -822,6 +843,9 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) * Return: * 0 on success, -errno on failure. */ +/* IAMROOT20 20240309 + * 제거할 영역만 분리하기 region을 나눔 + */ static int __init_memblock memblock_isolate_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size, int *start_rgn, int *end_rgn) @@ -836,6 +860,10 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, return 0; /* we'll create at most two more regions */ + /* IAMROOT20 20240309 + * 기존 region수에 위아래 최대로 추가될 수 있는 2개의 region수를 더한 값이 max region size보다 크면 + * memblock_double_array를 호출하여 memblock.type의 사이즈를 2배씩 증가시킴 + */ while (type->cnt + 2 > type->max) if (memblock_double_array(type, base, size) < 0) return -ENOMEM; @@ -844,11 +872,21 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, phys_addr_t rbase = rgn->base; phys_addr_t rend = rbase + rgn->size; + /* IAMROOT20 20240309 + * (rbase >= end)일 경우 분리할 영역이 기존 region과 겹치지 않으므로 반복문 종료 + */ if (rbase >= end) break; + /* IAMROOT20 20240309 + * (rend <= base)일 경우 분리할 영역이 현재 비교하고 있는 region과 겹치지 않으므로 다음 region으로 진행 + */ if (rend <= base) continue; + /* IAMROOT20 20240309 + * (rbase < base)일 경우 분리할 영역의 하위 부분과 현재 비교하고 있는 region의 상위 부분이 겹치므로 + * 비교하는 region의 겹치지 않는 하위 부분인 (rbase ~ base)영역을 분리해 new region으로 insert + */ if (rbase < base) { /* * @rgn intersects from below. Split and continue @@ -860,6 +898,11 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, memblock_insert_region(type, idx, rbase, base - rbase, memblock_get_region_node(rgn), rgn->flags); + /* IAMROOT20 20240309 + * (rend > end)일 경우 분리할 영역의 상위 부분과 현재 비교하고 있는 region의 하위 부분이 겹치므로 + * 비교하는 region의 겹치는 하위 부분인 (rbase ~ end)영역을 분리해 new region으로 insert + * region추가 후 idx를 하나 줄여 다음 루프에서 제거할 region을 가리키도록 함 + */ } else if (rend > end) { /* * @rgn intersects from above. Split and redo the @@ -892,6 +935,9 @@ static int __init_memblock memblock_remove_range(struct memblock_type *type, if (ret) return ret; + /* IAMROOT20 20240309 + * 분리된 region을 제거 + */ for (i = end_rgn - 1; i >= start_rgn; i--) memblock_remove_region(type, i); return 0; @@ -906,6 +952,7 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) return memblock_remove_range(&memblock.memory, base, size); } +/* IAMROOT20_END 20240309 */ /** * memblock_free - free boot memory allocation @@ -1138,6 +1185,9 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid) { + /* IAMROOT20 20240309 + * idx 값을 절반으로 나누어 lsb: idx_a의 카운터,, msb: idx_b의 카운터 + */ int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; @@ -1155,6 +1205,9 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, if (should_skip_region(type_a, m, nid, flags)) continue; + /* IAMROOT20 20240309 + * type_b에 대한 영역이 지정되지 않으면(null) 현재 1차 루프 인덱스의 memblock에 대한 영역을 반환 + */ if (!type_b) { if (out_start) *out_start = m_start; @@ -1174,6 +1227,9 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, phys_addr_t r_end; r = &type_b->regions[idx_b]; + /* IAMROOT20 20240309 + * idx_b가 0보다 크면 현재 이전 memblock의 끝 주소를 가리키고 idx_b가 0이면 0번 주소를 지정 + */ r_start = idx_b ? r[-1].base + r[-1].size : 0; r_end = idx_b < type_b->cnt ? r->base : PHYS_ADDR_MAX; @@ -1182,9 +1238,17 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, * if idx_b advanced past idx_a, * break out to advance idx_a */ + /* IAMROOT20 20240309 + * reserve memblock 영역이 memory memblock 영역을 벗어난 경우 2차 루프를 빠져나가서 다음 memory memblock을 준비 + */ if (r_start >= m_end) break; /* if the two regions intersect, we're done */ + /* IAMROOT20 20240309 + * 두 영역이 교차하는 경우 + * out_start에 하단 reserve 영역값의 끝 주소나 memory 영역값의 시작 주소중 가장 큰 주소 + * out_end에 상단 reserve 영역값의 시작 주소나 memory 영역값의 끝 주소중에 가장 작은 주소 + */ if (m_start < r_end) { if (out_start) *out_start = @@ -1197,6 +1261,10 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, * The region which ends first is * advanced for the next iteration. */ + /* IAMROOT20 20240309 + * reserve 영역의 끝 주소가 memory 영역의 끝주소와 비교하여 큰 경우 idx_a를 증가, 다음 memory 영역을 준비 + * 크지 않은 경우 idx_b를 증가, 다음 reserve 영역을 준비 + */ if (m_end <= r_end) idx_a++; else From 2e5b930e0ea64598c7af984d4106522f2c740e29 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 16 Mar 2024 22:03:14 +0900 Subject: [PATCH 045/104] IAMROOT20 20240316 - Add comments Signed-off-by: Daero Lee --- 1 | 2222 ------------------------------------------- drivers/of/fdt.c | 3 +- kernel/jump_label.c | 5 + 3 files changed, 7 insertions(+), 2223 deletions(-) delete mode 100644 1 diff --git a/1 b/1 deleted file mode 100644 index 1dfd416f16e1a..0000000000000 --- a/1 +++ /dev/null @@ -1,2222 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Procedures for maintaining information about logical memory blocks. - * - * Peter Bergner, IBM Corp. June 2001. - * Copyright (C) 2001 Peter Bergner. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "internal.h" - -#define INIT_MEMBLOCK_REGIONS 128 -#define INIT_PHYSMEM_REGIONS 4 - -#ifndef INIT_MEMBLOCK_RESERVED_REGIONS -#define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS -#endif - -#ifndef INIT_MEMBLOCK_MEMORY_REGIONS -#define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS -#endif - -/** - * DOC: memblock overview - * - * Memblock is a method of managing memory regions during the early - * boot period when the usual kernel memory allocators are not up and - * running. - * - * Memblock views the system memory as collections of contiguous - * regions. There are several types of these collections: - * - * * ``memory`` - describes the physical memory available to the - * kernel; this may differ from the actual physical memory installed - * in the system, for instance when the memory is restricted with - * ``mem=`` command line parameter - * * ``reserved`` - describes the regions that were allocated - * * ``physmem`` - describes the actual physical memory available during - * boot regardless of the possible restrictions and memory hot(un)plug; - * the ``physmem`` type is only available on some architectures. - * - * Each region is represented by struct memblock_region that - * defines the region extents, its attributes and NUMA node id on NUMA - * systems. Every memory type is described by the struct memblock_type - * which contains an array of memory regions along with - * the allocator metadata. The "memory" and "reserved" types are nicely - * wrapped with struct memblock. This structure is statically - * initialized at build time. The region arrays are initially sized to - * %INIT_MEMBLOCK_MEMORY_REGIONS for "memory" and - * %INIT_MEMBLOCK_RESERVED_REGIONS for "reserved". The region array - * for "physmem" is initially sized to %INIT_PHYSMEM_REGIONS. - * The memblock_allow_resize() enables automatic resizing of the region - * arrays during addition of new regions. This feature should be used - * with care so that memory allocated for the region array will not - * overlap with areas that should be reserved, for example initrd. - * - * The early architecture setup should tell memblock what the physical - * memory layout is by using memblock_add() or memblock_add_node() - * functions. The first function does not assign the region to a NUMA - * node and it is appropriate for UMA systems. Yet, it is possible to - * use it on NUMA systems as well and assign the region to a NUMA node - * later in the setup process using memblock_set_node(). The - * memblock_add_node() performs such an assignment directly. - * - * Once memblock is setup the memory can be allocated using one of the - * API variants: - * - * * memblock_phys_alloc*() - these functions return the **physical** - * address of the allocated memory - * * memblock_alloc*() - these functions return the **virtual** address - * of the allocated memory. - * - * Note, that both API variants use implicit assumptions about allowed - * memory ranges and the fallback methods. Consult the documentation - * of memblock_alloc_internal() and memblock_alloc_range_nid() - * functions for more elaborate description. - * - * As the system boot progresses, the architecture specific mem_init() - * function frees all the memory to the buddy page allocator. - * - * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the - * memblock data structures (except "physmem") will be discarded after the - * system initialization completes. - */ - -#ifndef CONFIG_NUMA -struct pglist_data __refdata contig_page_data; -EXPORT_SYMBOL(contig_page_data); -#endif - -unsigned long max_low_pfn; -unsigned long min_low_pfn; -unsigned long max_pfn; -unsigned long long max_possible_pfn; - -/* IAMROOT20 20240224 - * INIT_MEMBLOCK_MEMORY_REGIONS = 128 * 8 - * INIT_MEMBLOCK_RESERVED_REGIONS = 128 + 256 + 1 - */ -static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; -#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP -static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS]; -#endif - -struct memblock memblock __initdata_memblock = { - .memory.regions = memblock_memory_init_regions, - .memory.cnt = 1, /* empty dummy entry */ - .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS, - .memory.name = "memory", - - .reserved.regions = memblock_reserved_init_regions, - .reserved.cnt = 1, /* empty dummy entry */ - .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, - .reserved.name = "reserved", - - .bottom_up = false, - .current_limit = MEMBLOCK_ALLOC_ANYWHERE, -}; - -#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP -struct memblock_type physmem = { - .regions = memblock_physmem_init_regions, - .cnt = 1, /* empty dummy entry */ - .max = INIT_PHYSMEM_REGIONS, - .name = "physmem", -}; -#endif - -/* - * keep a pointer to &memblock.memory in the text section to use it in - * __next_mem_range() and its helpers. - * For architectures that do not keep memblock data after init, this - * pointer will be reset to NULL at memblock_discard() - */ -static __refdata struct memblock_type *memblock_memory = &memblock.memory; -#define for_each_memblock_type(i, memblock_type, rgn) \ - for (i = 0, rgn = &memblock_type->regions[0]; \ - i < memblock_type->cnt; \ - i++, rgn = &memblock_type->regions[i]) - -#define memblock_dbg(fmt, ...) \ - do { \ - if (memblock_debug) \ - pr_info(fmt, ##__VA_ARGS__); \ - } while (0) - -static int memblock_debug __initdata_memblock; -static bool system_has_some_mirror __initdata_memblock = false; -static int memblock_can_resize __initdata_memblock; -static int memblock_memory_in_slab __initdata_memblock = 0; -static int memblock_reserved_in_slab __initdata_memblock = 0; - -static enum memblock_flags __init_memblock choose_memblock_flags(void) -{ - return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE; -} - -/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ -/* IAMROOT20 20240309 - * ex) PHYS_ADDR_MAX = 0xffff_ffff_ffff_ffff - base = 0xffff_ffff_ffff_fff0 - size = 0x100 - return size = min(0x100, 0xf) = 0xf; - (base + size)가 overflow가 발생하면 (PHYS_ADDR_MAX - base)를 size로 재설정 - */ -static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) -{ - return *size = min(*size, PHYS_ADDR_MAX - base); -} - -/* - * Address comparison utilities - */ -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) -{ - return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); -} - -bool __init_memblock memblock_overlaps_region(struct memblock_type *type, - phys_addr_t base, phys_addr_t size) -{ - unsigned long i; - - memblock_cap_size(base, &size); - - for (i = 0; i < type->cnt; i++) - if (memblock_addrs_overlap(base, size, type->regions[i].base, - type->regions[i].size)) - break; - return i < type->cnt; -} - -/** - * __memblock_find_range_bottom_up - find free area utility in bottom-up - * @start: start of candidate range - * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or - * %MEMBLOCK_ALLOC_ACCESSIBLE - * @size: size of free area to find - * @align: alignment of free area to find - * @nid: nid of the free area to find, %NUMA_NO_NODE for any node - * @flags: pick from blocks based on memory attributes - * - * Utility called from memblock_find_in_range_node(), find free area bottom-up. - * - * Return: - * Found address on success, 0 on failure. - */ -static phys_addr_t __init_memblock -__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, - phys_addr_t size, phys_addr_t align, int nid, - enum memblock_flags flags) -{ - phys_addr_t this_start, this_end, cand; - u64 i; - - for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) { - this_start = clamp(this_start, start, end); - this_end = clamp(this_end, start, end); - - cand = round_up(this_start, align); - if (cand < this_end && this_end - cand >= size) - return cand; - } - - return 0; -} - -/** - * __memblock_find_range_top_down - find free area utility, in top-down - * @start: start of candidate range - * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or - * %MEMBLOCK_ALLOC_ACCESSIBLE - * @size: size of free area to find - * @align: alignment of free area to find - * @nid: nid of the free area to find, %NUMA_NO_NODE for any node - * @flags: pick from blocks based on memory attributes - * - * Utility called from memblock_find_in_range_node(), find free area top-down. - * - * Return: - * Found address on success, 0 on failure. - */ -static phys_addr_t __init_memblock -__memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, - phys_addr_t size, phys_addr_t align, int nid, - enum memblock_flags flags) -{ - phys_addr_t this_start, this_end, cand; - u64 i; - - for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end, - NULL) { - this_start = clamp(this_start, start, end); - this_end = clamp(this_end, start, end); - - if (this_end < size) - continue; - - cand = round_down(this_end - size, align); - if (cand >= this_start) - return cand; - } - - return 0; -} - -/** - * memblock_find_in_range_node - find free area in given range and node - * @size: size of free area to find - * @align: alignment of free area to find - * @start: start of candidate range - * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or - * %MEMBLOCK_ALLOC_ACCESSIBLE - * @nid: nid of the free area to find, %NUMA_NO_NODE for any node - * @flags: pick from blocks based on memory attributes - * - * Find @size free area aligned to @align in the specified range and node. - * - * Return: - * Found address on success, 0 on failure. - */ -static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, - phys_addr_t align, phys_addr_t start, - phys_addr_t end, int nid, - enum memblock_flags flags) -{ - /* pump up @end */ - if (end == MEMBLOCK_ALLOC_ACCESSIBLE || - end == MEMBLOCK_ALLOC_NOLEAKTRACE) - end = memblock.current_limit; - - /* avoid allocating the first page */ - start = max_t(phys_addr_t, start, PAGE_SIZE); - end = max(start, end); - - if (memblock_bottom_up()) - return __memblock_find_range_bottom_up(start, end, size, align, - nid, flags); - else - return __memblock_find_range_top_down(start, end, size, align, - nid, flags); -} - -/** - * memblock_find_in_range - find free area in given range - * @start: start of candidate range - * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or - * %MEMBLOCK_ALLOC_ACCESSIBLE - * @size: size of free area to find - * @align: alignment of free area to find - * - * Find @size free area aligned to @align in the specified range. - * - * Return: - * Found address on success, 0 on failure. - */ -static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, - phys_addr_t end, phys_addr_t size, - phys_addr_t align) -{ - phys_addr_t ret; - enum memblock_flags flags = choose_memblock_flags(); - -again: - ret = memblock_find_in_range_node(size, align, start, end, - NUMA_NO_NODE, flags); - - if (!ret && (flags & MEMBLOCK_MIRROR)) { - pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", - &size); - flags &= ~MEMBLOCK_MIRROR; - goto again; - } - - return ret; -} - -static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) -{ - type->total_size -= type->regions[r].size; - memmove(&type->regions[r], &type->regions[r + 1], - (type->cnt - (r + 1)) * sizeof(type->regions[r])); - type->cnt--; - - /* Special case for empty arrays */ - if (type->cnt == 0) { - WARN_ON(type->total_size != 0); - type->cnt = 1; - type->regions[0].base = 0; - type->regions[0].size = 0; - type->regions[0].flags = 0; - memblock_set_region_node(&type->regions[0], MAX_NUMNODES); - } -} - -#ifndef CONFIG_ARCH_KEEP_MEMBLOCK -/** - * memblock_discard - discard memory and reserved arrays if they were allocated - */ -void __init memblock_discard(void) -{ - phys_addr_t addr, size; - - if (memblock.reserved.regions != memblock_reserved_init_regions) { - addr = __pa(memblock.reserved.regions); - size = PAGE_ALIGN(sizeof(struct memblock_region) * - memblock.reserved.max); - if (memblock_reserved_in_slab) - kfree(memblock.reserved.regions); - else - memblock_free_late(addr, size); - } - - if (memblock.memory.regions != memblock_memory_init_regions) { - addr = __pa(memblock.memory.regions); - size = PAGE_ALIGN(sizeof(struct memblock_region) * - memblock.memory.max); - if (memblock_memory_in_slab) - kfree(memblock.memory.regions); - else - memblock_free_late(addr, size); - } - - memblock_memory = NULL; -} -#endif - -/** - * memblock_double_array - double the size of the memblock regions array - * @type: memblock type of the regions array being doubled - * @new_area_start: starting address of memory range to avoid overlap with - * @new_area_size: size of memory range to avoid overlap with - * - * Double the size of the @type regions array. If memblock is being used to - * allocate memory for a new reserved regions array and there is a previously - * allocated memory range [@new_area_start, @new_area_start + @new_area_size] - * waiting to be reserved, ensure the memory used by the new array does - * not overlap. - * - * Return: - * 0 on success, -1 on failure. - */ -static int __init_memblock memblock_double_array(struct memblock_type *type, - phys_addr_t new_area_start, - phys_addr_t new_area_size) -{ - struct memblock_region *new_array, *old_array; - phys_addr_t old_alloc_size, new_alloc_size; - phys_addr_t old_size, new_size, addr, new_end; - int use_slab = slab_is_available(); - int *in_slab; - - /* We don't allow resizing until we know about the reserved regions - * of memory that aren't suitable for allocation - */ - if (!memblock_can_resize) - return -1; - - /* Calculate new doubled size */ - old_size = type->max * sizeof(struct memblock_region); - new_size = old_size << 1; - /* - * We need to allocated new one align to PAGE_SIZE, - * so we can free them completely later. - */ - old_alloc_size = PAGE_ALIGN(old_size); - new_alloc_size = PAGE_ALIGN(new_size); - - /* Retrieve the slab flag */ - if (type == &memblock.memory) - in_slab = &memblock_memory_in_slab; - else - in_slab = &memblock_reserved_in_slab; - - /* Try to find some space for it */ - if (use_slab) { - new_array = kmalloc(new_size, GFP_KERNEL); - addr = new_array ? __pa(new_array) : 0; - } else { - /* only exclude range when trying to double reserved.regions */ - if (type != &memblock.reserved) - new_area_start = new_area_size = 0; - - addr = memblock_find_in_range(new_area_start + new_area_size, - memblock.current_limit, - new_alloc_size, PAGE_SIZE); - if (!addr && new_area_size) - addr = memblock_find_in_range(0, - min(new_area_start, memblock.current_limit), - new_alloc_size, PAGE_SIZE); - - new_array = addr ? __va(addr) : NULL; - } - if (!addr) { - pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", - type->name, type->max, type->max * 2); - return -1; - } - - new_end = addr + new_size - 1; - memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]", - type->name, type->max * 2, &addr, &new_end); - - /* - * Found space, we now need to move the array over before we add the - * reserved region since it may be our reserved array itself that is - * full. - */ - memcpy(new_array, type->regions, old_size); - memset(new_array + type->max, 0, old_size); - old_array = type->regions; - type->regions = new_array; - type->max <<= 1; - - /* Free old array. We needn't free it if the array is the static one */ - if (*in_slab) - kfree(old_array); - else if (old_array != memblock_memory_init_regions && - old_array != memblock_reserved_init_regions) - memblock_free(old_array, old_alloc_size); - - /* - * Reserve the new array if that comes from the memblock. Otherwise, we - * needn't do it - */ - if (!use_slab) - BUG_ON(memblock_reserve(addr, new_alloc_size)); - - /* Update slab flag */ - *in_slab = use_slab; - - return 0; -} - -/** - * memblock_merge_regions - merge neighboring compatible regions - * @type: memblock type to scan - * @start_rgn: start scanning from (@start_rgn - 1) - * @end_rgn: end scanning at (@end_rgn - 1) - * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn) - */ -static void __init_memblock memblock_merge_regions(struct memblock_type *type, - unsigned long start_rgn, - unsigned long end_rgn) -{ - int i = 0; - if (start_rgn) - i = start_rgn - 1; - end_rgn = min(end_rgn, type->cnt - 1); - while (i < end_rgn) { - struct memblock_region *this = &type->regions[i]; - struct memblock_region *next = &type->regions[i + 1]; - - if (this->base + this->size != next->base || - memblock_get_region_node(this) != - memblock_get_region_node(next) || - this->flags != next->flags) { - BUG_ON(this->base + this->size > next->base); - i++; - continue; - } - - this->size += next->size; - /* move forward from next + 1, index of which is i + 2 */ - memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); - type->cnt--; - end_rgn--; - } -} - -/** - * memblock_insert_region - insert new memblock region - * @type: memblock type to insert into - * @idx: index for the insertion point - * @base: base address of the new region - * @size: size of the new region - * @nid: node id of the new region - * @flags: flags of the new region - * - * Insert new memblock region [@base, @base + @size) into @type at @idx. - * @type must already have extra room to accommodate the new region. - */ -static void __init_memblock memblock_insert_region(struct memblock_type *type, - int idx, phys_addr_t base, - phys_addr_t size, - int nid, - enum memblock_flags flags) -{ - struct memblock_region *rgn = &type->regions[idx]; - - BUG_ON(type->cnt >= type->max); - memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); - rgn->base = base; - rgn->size = size; - rgn->flags = flags; - memblock_set_region_node(rgn, nid); - type->cnt++; - type->total_size += size; -} - -/** - * memblock_add_range - add new memblock region - * @type: memblock type to add new region into - * @base: base address of the new region - * @size: size of the new region - * @nid: nid of the new region - * @flags: flags of the new region - * - * Add new memblock region [@base, @base + @size) into @type. The new region - * is allowed to overlap with existing ones - overlaps don't affect already - * existing regions. @type is guaranteed to be minimal (all neighbouring - * compatible regions are merged) after the addition. - * - * Return: - * 0 on success, -errno on failure. - */ -/* IAMROOT20_START 20240309 */ -static int __init_memblock memblock_add_range(struct memblock_type *type, - phys_addr_t base, phys_addr_t size, - int nid, enum memblock_flags flags) -{ - bool insert = false; - phys_addr_t obase = base; - phys_addr_t end = base + memblock_cap_size(base, &size); - int idx, nr_new, start_rgn = -1, end_rgn; - struct memblock_region *rgn; - - if (!size) - return 0; - - /* special case for empty array */ - /* IAMROOT20 20240309 - * type에 등록된 regions이 없을 경우 - * new region을 등록 후 함수 종료 - */ - if (type->regions[0].size == 0) { - WARN_ON(type->cnt != 1 || type->total_size); - type->regions[0].base = base; - type->regions[0].size = size; - type->regions[0].flags = flags; - memblock_set_region_node(&type->regions[0], nid); - type->total_size = size; - return 0; - } - - /* - * The worst case is when new range overlaps all existing regions, - * then we'll need type->cnt + 1 empty regions in @type. So if - * type->cnt * 2 + 1 is less than or equal to type->max, we know - * that there is enough empty regions in @type, and we can insert - * regions directly. - */ - /* IAMROOT20 20240309 - * - */ - if (type->cnt * 2 + 1 <= type->max) - insert = true; - -repeat: - /* - * The following is executed twice. Once with %false @insert and - * then with %true. The first counts the number of regions needed - * to accommodate the new area. The second actually inserts them. - */ - base = obase; - nr_new = 0; - - for_each_memblock_type(idx, type, rgn) { - phys_addr_t rbase = rgn->base; - phys_addr_t rend = rbase + rgn->size; - - if (rbase >= end) - break; - if (rend <= base) - continue; - /* - * @rgn overlaps. If it separates the lower part of new - * area, insert that portion. - */ - if (rbase > base) { -#ifdef CONFIG_NUMA - WARN_ON(nid != memblock_get_region_node(rgn)); -#endif - WARN_ON(flags != rgn->flags); - nr_new++; - if (insert) { - if (start_rgn == -1) - start_rgn = idx; - end_rgn = idx + 1; - memblock_insert_region(type, idx++, base, - rbase - base, nid, - flags); - } - } - /* area below @rend is dealt with, forget about it */ - base = min(rend, end); - } - - /* insert the remaining portion */ - if (base < end) { - nr_new++; - if (insert) { - if (start_rgn == -1) - start_rgn = idx; - end_rgn = idx + 1; - memblock_insert_region(type, idx, base, end - base, - nid, flags); - } - } - - if (!nr_new) - return 0; - - /* - * If this was the first round, resize array and repeat for actual - * insertions; otherwise, merge and return. - */ - if (!insert) { - while (type->cnt + nr_new > type->max) - if (memblock_double_array(type, obase, size) < 0) - return -ENOMEM; - insert = true; - goto repeat; - } else { - memblock_merge_regions(type, start_rgn, end_rgn); - return 0; - } -} - -/** - * memblock_add_node - add new memblock region within a NUMA node - * @base: base address of the new region - * @size: size of the new region - * @nid: nid of the new region - * @flags: flags of the new region - * - * Add new memblock region [@base, @base + @size) to the "memory" - * type. See memblock_add_range() description for mode details - * - * Return: - * 0 on success, -errno on failure. - */ -int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, - int nid, enum memblock_flags flags) -{ - phys_addr_t end = base + size - 1; - - memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, - &base, &end, nid, flags, (void *)_RET_IP_); - - return memblock_add_range(&memblock.memory, base, size, nid, flags); -} - -/** - * memblock_add - add new memblock region - * @base: base address of the new region - * @size: size of the new region - * - * Add new memblock region [@base, @base + @size) to the "memory" - * type. See memblock_add_range() description for mode details - * - * Return: - * 0 on success, -errno on failure. - */ -/* IAMROOT20_20240302 - * memblock.memory에 base부터 end까지 region을 등록 - */ -int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) -{ - phys_addr_t end = base + size - 1; - - memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, - &base, &end, (void *)_RET_IP_); - - return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); -} - -/** - * memblock_isolate_range - isolate given range into disjoint memblocks - * @type: memblock type to isolate range for - * @base: base of range to isolate - * @size: size of range to isolate - * @start_rgn: out parameter for the start of isolated region - * @end_rgn: out parameter for the end of isolated region - * - * Walk @type and ensure that regions don't cross the boundaries defined by - * [@base, @base + @size). Crossing regions are split at the boundaries, - * which may create at most two more regions. The index of the first - * region inside the range is returned in *@start_rgn and end in *@end_rgn. - * - * Return: - * 0 on success, -errno on failure. - */ -static int __init_memblock memblock_isolate_range(struct memblock_type *type, - phys_addr_t base, phys_addr_t size, - int *start_rgn, int *end_rgn) -{ - phys_addr_t end = base + memblock_cap_size(base, &size); - int idx; - struct memblock_region *rgn; - - *start_rgn = *end_rgn = 0; - - if (!size) - return 0; - - /* we'll create at most two more regions */ - while (type->cnt + 2 > type->max) - if (memblock_double_array(type, base, size) < 0) - return -ENOMEM; - - for_each_memblock_type(idx, type, rgn) { - phys_addr_t rbase = rgn->base; - phys_addr_t rend = rbase + rgn->size; - - if (rbase >= end) - break; - if (rend <= base) - continue; - - if (rbase < base) { - /* - * @rgn intersects from below. Split and continue - * to process the next region - the new top half. - */ - rgn->base = base; - rgn->size -= base - rbase; - type->total_size -= base - rbase; - memblock_insert_region(type, idx, rbase, base - rbase, - memblock_get_region_node(rgn), - rgn->flags); - } else if (rend > end) { - /* - * @rgn intersects from above. Split and redo the - * current region - the new bottom half. - */ - rgn->base = end; - rgn->size -= end - rbase; - type->total_size -= end - rbase; - memblock_insert_region(type, idx--, rbase, end - rbase, - memblock_get_region_node(rgn), - rgn->flags); - } else { - /* @rgn is fully contained, record it */ - if (!*end_rgn) - *start_rgn = idx; - *end_rgn = idx + 1; - } - } - - return 0; -} - -static int __init_memblock memblock_remove_range(struct memblock_type *type, - phys_addr_t base, phys_addr_t size) -{ - int start_rgn, end_rgn; - int i, ret; - - ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); - if (ret) - return ret; - - for (i = end_rgn - 1; i >= start_rgn; i--) - memblock_remove_region(type, i); - return 0; -} - -int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) -{ - phys_addr_t end = base + size - 1; - - memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, - &base, &end, (void *)_RET_IP_); - - return memblock_remove_range(&memblock.memory, base, size); -} - -/** - * memblock_free - free boot memory allocation - * @ptr: starting address of the boot memory allocation - * @size: size of the boot memory block in bytes - * - * Free boot memory block previously allocated by memblock_alloc_xx() API. - * The freeing memory will not be released to the buddy allocator. - */ -void __init_memblock memblock_free(void *ptr, size_t size) -{ - if (ptr) - memblock_phys_free(__pa(ptr), size); -} - -/** - * memblock_phys_free - free boot memory block - * @base: phys starting address of the boot memory block - * @size: size of the boot memory block in bytes - * - * Free boot memory block previously allocated by memblock_phys_alloc_xx() API. - * The freeing memory will not be released to the buddy allocator. - */ -int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) -{ - phys_addr_t end = base + size - 1; - - memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, - &base, &end, (void *)_RET_IP_); - - kmemleak_free_part_phys(base, size); - return memblock_remove_range(&memblock.reserved, base, size); -} - -/* IAMROOT20_20240302 - * memblock.reserved에 base부터 end까지 region을 등록 - */ -/* IAMROOT20_END 20240302 */ -int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) -{ - phys_addr_t end = base + size - 1; - - memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, - &base, &end, (void *)_RET_IP_); - - return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0); -} - -#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP -int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size) -{ - phys_addr_t end = base + size - 1; - - memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, - &base, &end, (void *)_RET_IP_); - - return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0); -} -#endif - -/** - * memblock_setclr_flag - set or clear flag for a memory region - * @base: base address of the region - * @size: size of the region - * @set: set or clear the flag - * @flag: the flag to update - * - * This function isolates region [@base, @base + @size), and sets/clears flag - * - * Return: 0 on success, -errno on failure. - */ -static int __init_memblock memblock_setclr_flag(phys_addr_t base, - phys_addr_t size, int set, int flag) -{ - struct memblock_type *type = &memblock.memory; - int i, ret, start_rgn, end_rgn; - - ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); - if (ret) - return ret; - - for (i = start_rgn; i < end_rgn; i++) { - struct memblock_region *r = &type->regions[i]; - - if (set) - r->flags |= flag; - else - r->flags &= ~flag; - } - - memblock_merge_regions(type, start_rgn, end_rgn); - return 0; -} - -/** - * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. - * @base: the base phys addr of the region - * @size: the size of the region - * - * Return: 0 on success, -errno on failure. - */ -int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) -{ - return memblock_setclr_flag(base, size, 1, MEMBLOCK_HOTPLUG); -} - -/** - * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. - * @base: the base phys addr of the region - * @size: the size of the region - * - * Return: 0 on success, -errno on failure. - */ -int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) -{ - return memblock_setclr_flag(base, size, 0, MEMBLOCK_HOTPLUG); -} - -/** - * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR. - * @base: the base phys addr of the region - * @size: the size of the region - * - * Return: 0 on success, -errno on failure. - */ -int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) -{ - if (!mirrored_kernelcore) - return 0; - - system_has_some_mirror = true; - - return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR); -} - -/** - * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP. - * @base: the base phys addr of the region - * @size: the size of the region - * - * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the - * direct mapping of the physical memory. These regions will still be - * covered by the memory map. The struct page representing NOMAP memory - * frames in the memory map will be PageReserved() - * - * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from - * memblock, the caller must inform kmemleak to ignore that memory - * - * Return: 0 on success, -errno on failure. - */ -int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) -{ - return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP); -} - -/** - * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. - * @base: the base phys addr of the region - * @size: the size of the region - * - * Return: 0 on success, -errno on failure. - */ -int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) -{ - return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP); -} - -static bool should_skip_region(struct memblock_type *type, - struct memblock_region *m, - int nid, int flags) -{ - int m_nid = memblock_get_region_node(m); - - /* we never skip regions when iterating memblock.reserved or physmem */ - if (type != memblock_memory) - return false; - - /* only memory regions are associated with nodes, check it */ - if (nid != NUMA_NO_NODE && nid != m_nid) - return true; - - /* skip hotpluggable memory regions if needed */ - if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && - !(flags & MEMBLOCK_HOTPLUG)) - return true; - - /* if we want mirror memory skip non-mirror memory regions */ - if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) - return true; - - /* skip nomap memory unless we were asked for it explicitly */ - if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) - return true; - - /* skip driver-managed memory unless we were asked for it explicitly */ - if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m)) - return true; - - return false; -} - -/** - * __next_mem_range - next function for for_each_free_mem_range() etc. - * @idx: pointer to u64 loop variable - * @nid: node selector, %NUMA_NO_NODE for all nodes - * @flags: pick from blocks based on memory attributes - * @type_a: pointer to memblock_type from where the range is taken - * @type_b: pointer to memblock_type which excludes memory from being taken - * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL - * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL - * @out_nid: ptr to int for nid of the range, can be %NULL - * - * Find the first area from *@idx which matches @nid, fill the out - * parameters, and update *@idx for the next iteration. The lower 32bit of - * *@idx contains index into type_a and the upper 32bit indexes the - * areas before each region in type_b. For example, if type_b regions - * look like the following, - * - * 0:[0-16), 1:[32-48), 2:[128-130) - * - * The upper 32bit indexes the following regions. - * - * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) - * - * As both region arrays are sorted, the function advances the two indices - * in lockstep and returns each intersection. - */ -void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, - struct memblock_type *type_a, - struct memblock_type *type_b, phys_addr_t *out_start, - phys_addr_t *out_end, int *out_nid) -{ - int idx_a = *idx & 0xffffffff; - int idx_b = *idx >> 32; - - if (WARN_ONCE(nid == MAX_NUMNODES, - "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - - for (; idx_a < type_a->cnt; idx_a++) { - struct memblock_region *m = &type_a->regions[idx_a]; - - phys_addr_t m_start = m->base; - phys_addr_t m_end = m->base + m->size; - int m_nid = memblock_get_region_node(m); - - if (should_skip_region(type_a, m, nid, flags)) - continue; - - if (!type_b) { - if (out_start) - *out_start = m_start; - if (out_end) - *out_end = m_end; - if (out_nid) - *out_nid = m_nid; - idx_a++; - *idx = (u32)idx_a | (u64)idx_b << 32; - return; - } - - /* scan areas before each reservation */ - for (; idx_b < type_b->cnt + 1; idx_b++) { - struct memblock_region *r; - phys_addr_t r_start; - phys_addr_t r_end; - - r = &type_b->regions[idx_b]; - r_start = idx_b ? r[-1].base + r[-1].size : 0; - r_end = idx_b < type_b->cnt ? - r->base : PHYS_ADDR_MAX; - - /* - * if idx_b advanced past idx_a, - * break out to advance idx_a - */ - if (r_start >= m_end) - break; - /* if the two regions intersect, we're done */ - if (m_start < r_end) { - if (out_start) - *out_start = - max(m_start, r_start); - if (out_end) - *out_end = min(m_end, r_end); - if (out_nid) - *out_nid = m_nid; - /* - * The region which ends first is - * advanced for the next iteration. - */ - if (m_end <= r_end) - idx_a++; - else - idx_b++; - *idx = (u32)idx_a | (u64)idx_b << 32; - return; - } - } - } - - /* signal end of iteration */ - *idx = ULLONG_MAX; -} - -/** - * __next_mem_range_rev - generic next function for for_each_*_range_rev() - * - * @idx: pointer to u64 loop variable - * @nid: node selector, %NUMA_NO_NODE for all nodes - * @flags: pick from blocks based on memory attributes - * @type_a: pointer to memblock_type from where the range is taken - * @type_b: pointer to memblock_type which excludes memory from being taken - * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL - * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL - * @out_nid: ptr to int for nid of the range, can be %NULL - * - * Finds the next range from type_a which is not marked as unsuitable - * in type_b. - * - * Reverse of __next_mem_range(). - */ -void __init_memblock __next_mem_range_rev(u64 *idx, int nid, - enum memblock_flags flags, - struct memblock_type *type_a, - struct memblock_type *type_b, - phys_addr_t *out_start, - phys_addr_t *out_end, int *out_nid) -{ - int idx_a = *idx & 0xffffffff; - int idx_b = *idx >> 32; - - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - - if (*idx == (u64)ULLONG_MAX) { - idx_a = type_a->cnt - 1; - if (type_b != NULL) - idx_b = type_b->cnt; - else - idx_b = 0; - } - - for (; idx_a >= 0; idx_a--) { - struct memblock_region *m = &type_a->regions[idx_a]; - - phys_addr_t m_start = m->base; - phys_addr_t m_end = m->base + m->size; - int m_nid = memblock_get_region_node(m); - - if (should_skip_region(type_a, m, nid, flags)) - continue; - - if (!type_b) { - if (out_start) - *out_start = m_start; - if (out_end) - *out_end = m_end; - if (out_nid) - *out_nid = m_nid; - idx_a--; - *idx = (u32)idx_a | (u64)idx_b << 32; - return; - } - - /* scan areas before each reservation */ - for (; idx_b >= 0; idx_b--) { - struct memblock_region *r; - phys_addr_t r_start; - phys_addr_t r_end; - - r = &type_b->regions[idx_b]; - r_start = idx_b ? r[-1].base + r[-1].size : 0; - r_end = idx_b < type_b->cnt ? - r->base : PHYS_ADDR_MAX; - /* - * if idx_b advanced past idx_a, - * break out to advance idx_a - */ - - if (r_end <= m_start) - break; - /* if the two regions intersect, we're done */ - if (m_end > r_start) { - if (out_start) - *out_start = max(m_start, r_start); - if (out_end) - *out_end = min(m_end, r_end); - if (out_nid) - *out_nid = m_nid; - if (m_start >= r_start) - idx_a--; - else - idx_b--; - *idx = (u32)idx_a | (u64)idx_b << 32; - return; - } - } - } - /* signal end of iteration */ - *idx = ULLONG_MAX; -} - -/* - * Common iterator interface used to define for_each_mem_pfn_range(). - */ -void __init_memblock __next_mem_pfn_range(int *idx, int nid, - unsigned long *out_start_pfn, - unsigned long *out_end_pfn, int *out_nid) -{ - struct memblock_type *type = &memblock.memory; - struct memblock_region *r; - int r_nid; - - while (++*idx < type->cnt) { - r = &type->regions[*idx]; - r_nid = memblock_get_region_node(r); - - if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) - continue; - if (nid == MAX_NUMNODES || nid == r_nid) - break; - } - if (*idx >= type->cnt) { - *idx = -1; - return; - } - - if (out_start_pfn) - *out_start_pfn = PFN_UP(r->base); - if (out_end_pfn) - *out_end_pfn = PFN_DOWN(r->base + r->size); - if (out_nid) - *out_nid = r_nid; -} - -/** - * memblock_set_node - set node ID on memblock regions - * @base: base of area to set node ID for - * @size: size of area to set node ID for - * @type: memblock type to set node ID for - * @nid: node ID to set - * - * Set the nid of memblock @type regions in [@base, @base + @size) to @nid. - * Regions which cross the area boundaries are split as necessary. - * - * Return: - * 0 on success, -errno on failure. - */ -int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, - struct memblock_type *type, int nid) -{ -#ifdef CONFIG_NUMA - int start_rgn, end_rgn; - int i, ret; - - ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); - if (ret) - return ret; - - for (i = start_rgn; i < end_rgn; i++) - memblock_set_region_node(&type->regions[i], nid); - - memblock_merge_regions(type, start_rgn, end_rgn); -#endif - return 0; -} - -#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -/** - * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() - * - * @idx: pointer to u64 loop variable - * @zone: zone in which all of the memory blocks reside - * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL - * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL - * - * This function is meant to be a zone/pfn specific wrapper for the - * for_each_mem_range type iterators. Specifically they are used in the - * deferred memory init routines and as such we were duplicating much of - * this logic throughout the code. So instead of having it in multiple - * locations it seemed like it would make more sense to centralize this to - * one new iterator that does everything they need. - */ -void __init_memblock -__next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, - unsigned long *out_spfn, unsigned long *out_epfn) -{ - int zone_nid = zone_to_nid(zone); - phys_addr_t spa, epa; - - __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, - &memblock.memory, &memblock.reserved, - &spa, &epa, NULL); - - while (*idx != U64_MAX) { - unsigned long epfn = PFN_DOWN(epa); - unsigned long spfn = PFN_UP(spa); - - /* - * Verify the end is at least past the start of the zone and - * that we have at least one PFN to initialize. - */ - if (zone->zone_start_pfn < epfn && spfn < epfn) { - /* if we went too far just stop searching */ - if (zone_end_pfn(zone) <= spfn) { - *idx = U64_MAX; - break; - } - - if (out_spfn) - *out_spfn = max(zone->zone_start_pfn, spfn); - if (out_epfn) - *out_epfn = min(zone_end_pfn(zone), epfn); - - return; - } - - __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, - &memblock.memory, &memblock.reserved, - &spa, &epa, NULL); - } - - /* signal end of iteration */ - if (out_spfn) - *out_spfn = ULONG_MAX; - if (out_epfn) - *out_epfn = 0; -} - -#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ - -/** - * memblock_alloc_range_nid - allocate boot memory block - * @size: size of memory block to be allocated in bytes - * @align: alignment of the region and block's size - * @start: the lower bound of the memory region to allocate (phys address) - * @end: the upper bound of the memory region to allocate (phys address) - * @nid: nid of the free area to find, %NUMA_NO_NODE for any node - * @exact_nid: control the allocation fall back to other nodes - * - * The allocation is performed from memory region limited by - * memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE. - * - * If the specified node can not hold the requested memory and @exact_nid - * is false, the allocation falls back to any node in the system. - * - * For systems with memory mirroring, the allocation is attempted first - * from the regions with mirroring enabled and then retried from any - * memory region. - * - * In addition, function using kmemleak_alloc_phys for allocated boot - * memory block, it is never reported as leaks. - * - * Return: - * Physical address of allocated memory block on success, %0 on failure. - */ -phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, - phys_addr_t align, phys_addr_t start, - phys_addr_t end, int nid, - bool exact_nid) -{ - enum memblock_flags flags = choose_memblock_flags(); - phys_addr_t found; - - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - - if (!align) { - /* Can't use WARNs this early in boot on powerpc */ - dump_stack(); - align = SMP_CACHE_BYTES; - } - -again: - found = memblock_find_in_range_node(size, align, start, end, nid, - flags); - if (found && !memblock_reserve(found, size)) - goto done; - - if (nid != NUMA_NO_NODE && !exact_nid) { - found = memblock_find_in_range_node(size, align, start, - end, NUMA_NO_NODE, - flags); - if (found && !memblock_reserve(found, size)) - goto done; - } - - if (flags & MEMBLOCK_MIRROR) { - flags &= ~MEMBLOCK_MIRROR; - pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", - &size); - goto again; - } - - return 0; - -done: - /* - * Skip kmemleak for those places like kasan_init() and - * early_pgtable_alloc() due to high volume. - */ - if (end != MEMBLOCK_ALLOC_NOLEAKTRACE) - /* - * Memblock allocated blocks are never reported as - * leaks. This is because many of these blocks are - * only referred via the physical address which is - * not looked up by kmemleak. - */ - kmemleak_alloc_phys(found, size, 0); - - return found; -} - -/** - * memblock_phys_alloc_range - allocate a memory block inside specified range - * @size: size of memory block to be allocated in bytes - * @align: alignment of the region and block's size - * @start: the lower bound of the memory region to allocate (physical address) - * @end: the upper bound of the memory region to allocate (physical address) - * - * Allocate @size bytes in the between @start and @end. - * - * Return: physical address of the allocated memory block on success, - * %0 on failure. - */ -phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, - phys_addr_t align, - phys_addr_t start, - phys_addr_t end) -{ - memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n", - __func__, (u64)size, (u64)align, &start, &end, - (void *)_RET_IP_); - return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE, - false); -} - -/** - * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node - * @size: size of memory block to be allocated in bytes - * @align: alignment of the region and block's size - * @nid: nid of the free area to find, %NUMA_NO_NODE for any node - * - * Allocates memory block from the specified NUMA node. If the node - * has no available memory, attempts to allocated from any node in the - * system. - * - * Return: physical address of the allocated memory block on success, - * %0 on failure. - */ -phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) -{ - return memblock_alloc_range_nid(size, align, 0, - MEMBLOCK_ALLOC_ACCESSIBLE, nid, false); -} - -/** - * memblock_alloc_internal - allocate boot memory block - * @size: size of memory block to be allocated in bytes - * @align: alignment of the region and block's size - * @min_addr: the lower bound of the memory region to allocate (phys address) - * @max_addr: the upper bound of the memory region to allocate (phys address) - * @nid: nid of the free area to find, %NUMA_NO_NODE for any node - * @exact_nid: control the allocation fall back to other nodes - * - * Allocates memory block using memblock_alloc_range_nid() and - * converts the returned physical address to virtual. - * - * The @min_addr limit is dropped if it can not be satisfied and the allocation - * will fall back to memory below @min_addr. Other constraints, such - * as node and mirrored memory will be handled again in - * memblock_alloc_range_nid(). - * - * Return: - * Virtual address of allocated memory block on success, NULL on failure. - */ -static void * __init memblock_alloc_internal( - phys_addr_t size, phys_addr_t align, - phys_addr_t min_addr, phys_addr_t max_addr, - int nid, bool exact_nid) -{ - phys_addr_t alloc; - - /* - * Detect any accidental use of these APIs after slab is ready, as at - * this moment memblock may be deinitialized already and its - * internal data may be destroyed (after execution of memblock_free_all) - */ - if (WARN_ON_ONCE(slab_is_available())) - return kzalloc_node(size, GFP_NOWAIT, nid); - - if (max_addr > memblock.current_limit) - max_addr = memblock.current_limit; - - alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid, - exact_nid); - - /* retry allocation without lower limit */ - if (!alloc && min_addr) - alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid, - exact_nid); - - if (!alloc) - return NULL; - - return phys_to_virt(alloc); -} - -/** - * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node - * without zeroing memory - * @size: size of memory block to be allocated in bytes - * @align: alignment of the region and block's size - * @min_addr: the lower bound of the memory region from where the allocation - * is preferred (phys address) - * @max_addr: the upper bound of the memory region from where the allocation - * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to - * allocate only from memory limited by memblock.current_limit value - * @nid: nid of the free area to find, %NUMA_NO_NODE for any node - * - * Public function, provides additional debug information (including caller - * info), if enabled. Does not zero allocated memory. - * - * Return: - * Virtual address of allocated memory block on success, NULL on failure. - */ -void * __init memblock_alloc_exact_nid_raw( - phys_addr_t size, phys_addr_t align, - phys_addr_t min_addr, phys_addr_t max_addr, - int nid) -{ - memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", - __func__, (u64)size, (u64)align, nid, &min_addr, - &max_addr, (void *)_RET_IP_); - - return memblock_alloc_internal(size, align, min_addr, max_addr, nid, - true); -} - -/** - * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing - * memory and without panicking - * @size: size of memory block to be allocated in bytes - * @align: alignment of the region and block's size - * @min_addr: the lower bound of the memory region from where the allocation - * is preferred (phys address) - * @max_addr: the upper bound of the memory region from where the allocation - * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to - * allocate only from memory limited by memblock.current_limit value - * @nid: nid of the free area to find, %NUMA_NO_NODE for any node - * - * Public function, provides additional debug information (including caller - * info), if enabled. Does not zero allocated memory, does not panic if request - * cannot be satisfied. - * - * Return: - * Virtual address of allocated memory block on success, NULL on failure. - */ -void * __init memblock_alloc_try_nid_raw( - phys_addr_t size, phys_addr_t align, - phys_addr_t min_addr, phys_addr_t max_addr, - int nid) -{ - memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", - __func__, (u64)size, (u64)align, nid, &min_addr, - &max_addr, (void *)_RET_IP_); - - return memblock_alloc_internal(size, align, min_addr, max_addr, nid, - false); -} - -/** - * memblock_alloc_try_nid - allocate boot memory block - * @size: size of memory block to be allocated in bytes - * @align: alignment of the region and block's size - * @min_addr: the lower bound of the memory region from where the allocation - * is preferred (phys address) - * @max_addr: the upper bound of the memory region from where the allocation - * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to - * allocate only from memory limited by memblock.current_limit value - * @nid: nid of the free area to find, %NUMA_NO_NODE for any node - * - * Public function, provides additional debug information (including caller - * info), if enabled. This function zeroes the allocated memory. - * - * Return: - * Virtual address of allocated memory block on success, NULL on failure. - */ -void * __init memblock_alloc_try_nid( - phys_addr_t size, phys_addr_t align, - phys_addr_t min_addr, phys_addr_t max_addr, - int nid) -{ - void *ptr; - - memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", - __func__, (u64)size, (u64)align, nid, &min_addr, - &max_addr, (void *)_RET_IP_); - ptr = memblock_alloc_internal(size, align, - min_addr, max_addr, nid, false); - if (ptr) - memset(ptr, 0, size); - - return ptr; -} - -/** - * memblock_free_late - free pages directly to buddy allocator - * @base: phys starting address of the boot memory block - * @size: size of the boot memory block in bytes - * - * This is only useful when the memblock allocator has already been torn - * down, but we are still initializing the system. Pages are released directly - * to the buddy allocator. - */ -void __init memblock_free_late(phys_addr_t base, phys_addr_t size) -{ - phys_addr_t cursor, end; - - end = base + size - 1; - memblock_dbg("%s: [%pa-%pa] %pS\n", - __func__, &base, &end, (void *)_RET_IP_); - kmemleak_free_part_phys(base, size); - cursor = PFN_UP(base); - end = PFN_DOWN(base + size); - - for (; cursor < end; cursor++) { - memblock_free_pages(pfn_to_page(cursor), cursor, 0); - totalram_pages_inc(); - } -} - -/* - * Remaining API functions - */ - -phys_addr_t __init_memblock memblock_phys_mem_size(void) -{ - return memblock.memory.total_size; -} - -phys_addr_t __init_memblock memblock_reserved_size(void) -{ - return memblock.reserved.total_size; -} - -/* lowest address */ -phys_addr_t __init_memblock memblock_start_of_DRAM(void) -{ - return memblock.memory.regions[0].base; -} - -phys_addr_t __init_memblock memblock_end_of_DRAM(void) -{ - int idx = memblock.memory.cnt - 1; - - return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); -} - -static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) -{ - phys_addr_t max_addr = PHYS_ADDR_MAX; - struct memblock_region *r; - - /* - * translate the memory @limit size into the max address within one of - * the memory memblock regions, if the @limit exceeds the total size - * of those regions, max_addr will keep original value PHYS_ADDR_MAX - */ - for_each_mem_region(r) { - if (limit <= r->size) { - max_addr = r->base + limit; - break; - } - limit -= r->size; - } - - return max_addr; -} - -void __init memblock_enforce_memory_limit(phys_addr_t limit) -{ - phys_addr_t max_addr; - - if (!limit) - return; - - max_addr = __find_max_addr(limit); - - /* @limit exceeds the total size of the memory, do nothing */ - if (max_addr == PHYS_ADDR_MAX) - return; - - /* truncate both memory and reserved regions */ - memblock_remove_range(&memblock.memory, max_addr, - PHYS_ADDR_MAX); - memblock_remove_range(&memblock.reserved, max_addr, - PHYS_ADDR_MAX); -} - -void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) -{ - int start_rgn, end_rgn; - int i, ret; - - if (!size) - return; - - if (!memblock_memory->total_size) { - pr_warn("%s: No memory registered yet\n", __func__); - return; - } - - ret = memblock_isolate_range(&memblock.memory, base, size, - &start_rgn, &end_rgn); - if (ret) - return; - - /* remove all the MAP regions */ - for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) - if (!memblock_is_nomap(&memblock.memory.regions[i])) - memblock_remove_region(&memblock.memory, i); - - for (i = start_rgn - 1; i >= 0; i--) - if (!memblock_is_nomap(&memblock.memory.regions[i])) - memblock_remove_region(&memblock.memory, i); - - /* truncate the reserved regions */ - memblock_remove_range(&memblock.reserved, 0, base); - memblock_remove_range(&memblock.reserved, - base + size, PHYS_ADDR_MAX); -} - -void __init memblock_mem_limit_remove_map(phys_addr_t limit) -{ - phys_addr_t max_addr; - - if (!limit) - return; - - max_addr = __find_max_addr(limit); - - /* @limit exceeds the total size of the memory, do nothing */ - if (max_addr == PHYS_ADDR_MAX) - return; - - memblock_cap_memory_range(0, max_addr); -} - -static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) -{ - unsigned int left = 0, right = type->cnt; - - do { - unsigned int mid = (right + left) / 2; - - if (addr < type->regions[mid].base) - right = mid; - else if (addr >= (type->regions[mid].base + - type->regions[mid].size)) - left = mid + 1; - else - return mid; - } while (left < right); - return -1; -} - -bool __init_memblock memblock_is_reserved(phys_addr_t addr) -{ - return memblock_search(&memblock.reserved, addr) != -1; -} - -bool __init_memblock memblock_is_memory(phys_addr_t addr) -{ - return memblock_search(&memblock.memory, addr) != -1; -} - -bool __init_memblock memblock_is_map_memory(phys_addr_t addr) -{ - int i = memblock_search(&memblock.memory, addr); - - if (i == -1) - return false; - return !memblock_is_nomap(&memblock.memory.regions[i]); -} - -int __init_memblock memblock_search_pfn_nid(unsigned long pfn, - unsigned long *start_pfn, unsigned long *end_pfn) -{ - struct memblock_type *type = &memblock.memory; - int mid = memblock_search(type, PFN_PHYS(pfn)); - - if (mid == -1) - return -1; - - *start_pfn = PFN_DOWN(type->regions[mid].base); - *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); - - return memblock_get_region_node(&type->regions[mid]); -} - -/** - * memblock_is_region_memory - check if a region is a subset of memory - * @base: base of region to check - * @size: size of region to check - * - * Check if the region [@base, @base + @size) is a subset of a memory block. - * - * Return: - * 0 if false, non-zero if true - */ -bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) -{ - int idx = memblock_search(&memblock.memory, base); - phys_addr_t end = base + memblock_cap_size(base, &size); - - if (idx == -1) - return false; - return (memblock.memory.regions[idx].base + - memblock.memory.regions[idx].size) >= end; -} - -/** - * memblock_is_region_reserved - check if a region intersects reserved memory - * @base: base of region to check - * @size: size of region to check - * - * Check if the region [@base, @base + @size) intersects a reserved - * memory block. - * - * Return: - * True if they intersect, false if not. - */ -bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) -{ - return memblock_overlaps_region(&memblock.reserved, base, size); -} - -void __init_memblock memblock_trim_memory(phys_addr_t align) -{ - phys_addr_t start, end, orig_start, orig_end; - struct memblock_region *r; - - for_each_mem_region(r) { - orig_start = r->base; - orig_end = r->base + r->size; - start = round_up(orig_start, align); - end = round_down(orig_end, align); - - if (start == orig_start && end == orig_end) - continue; - - if (start < end) { - r->base = start; - r->size = end - start; - } else { - memblock_remove_region(&memblock.memory, - r - memblock.memory.regions); - r--; - } - } -} - -void __init_memblock memblock_set_current_limit(phys_addr_t limit) -{ - memblock.current_limit = limit; -} - -phys_addr_t __init_memblock memblock_get_current_limit(void) -{ - return memblock.current_limit; -} - -static void __init_memblock memblock_dump(struct memblock_type *type) -{ - phys_addr_t base, end, size; - enum memblock_flags flags; - int idx; - struct memblock_region *rgn; - - pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt); - - for_each_memblock_type(idx, type, rgn) { - char nid_buf[32] = ""; - - base = rgn->base; - size = rgn->size; - end = base + size - 1; - flags = rgn->flags; -#ifdef CONFIG_NUMA - if (memblock_get_region_node(rgn) != MAX_NUMNODES) - snprintf(nid_buf, sizeof(nid_buf), " on node %d", - memblock_get_region_node(rgn)); -#endif - pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n", - type->name, idx, &base, &end, &size, nid_buf, flags); - } -} - -static void __init_memblock __memblock_dump_all(void) -{ - pr_info("MEMBLOCK configuration:\n"); - pr_info(" memory size = %pa reserved size = %pa\n", - &memblock.memory.total_size, - &memblock.reserved.total_size); - - memblock_dump(&memblock.memory); - memblock_dump(&memblock.reserved); -#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - memblock_dump(&physmem); -#endif -} - -void __init_memblock memblock_dump_all(void) -{ - if (memblock_debug) - __memblock_dump_all(); -} - -void __init memblock_allow_resize(void) -{ - memblock_can_resize = 1; -} - -static int __init early_memblock(char *p) -{ - if (p && strstr(p, "debug")) - memblock_debug = 1; - return 0; -} -early_param("memblock", early_memblock); - -static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) -{ - struct page *start_pg, *end_pg; - phys_addr_t pg, pgend; - - /* - * Convert start_pfn/end_pfn to a struct page pointer. - */ - start_pg = pfn_to_page(start_pfn - 1) + 1; - end_pg = pfn_to_page(end_pfn - 1) + 1; - - /* - * Convert to physical addresses, and round start upwards and end - * downwards. - */ - pg = PAGE_ALIGN(__pa(start_pg)); - pgend = __pa(end_pg) & PAGE_MASK; - - /* - * If there are free pages between these, free the section of the - * memmap array. - */ - if (pg < pgend) - memblock_phys_free(pg, pgend - pg); -} - -/* - * The mem_map array can get very big. Free the unused area of the memory map. - */ -static void __init free_unused_memmap(void) -{ - unsigned long start, end, prev_end = 0; - int i; - - if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) || - IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) - return; - - /* - * This relies on each bank being in address order. - * The banks are sorted previously in bootmem_init(). - */ - for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { -#ifdef CONFIG_SPARSEMEM - /* - * Take care not to free memmap entries that don't exist - * due to SPARSEMEM sections which aren't present. - */ - start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); -#endif - /* - * Align down here since many operations in VM subsystem - * presume that there are no holes in the memory map inside - * a pageblock - */ - start = pageblock_start_pfn(start); - - /* - * If we had a previous bank, and there is a space - * between the current bank and the previous, free it. - */ - if (prev_end && prev_end < start) - free_memmap(prev_end, start); - - /* - * Align up here since many operations in VM subsystem - * presume that there are no holes in the memory map inside - * a pageblock - */ - prev_end = pageblock_align(end); - } - -#ifdef CONFIG_SPARSEMEM - if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) { - prev_end = pageblock_align(end); - free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); - } -#endif -} - -static void __init __free_pages_memory(unsigned long start, unsigned long end) -{ - int order; - - while (start < end) { - /* - * Free the pages in the largest chunks alignment allows. - * - * __ffs() behaviour is undefined for 0. start == 0 is - * MAX_ORDER-aligned, set order to MAX_ORDER for the case. - */ - if (start) - order = min_t(int, MAX_ORDER, __ffs(start)); - else - order = MAX_ORDER; - - while (start + (1UL << order) > end) - order--; - - memblock_free_pages(pfn_to_page(start), start, order); - - start += (1UL << order); - } -} - -static unsigned long __init __free_memory_core(phys_addr_t start, - phys_addr_t end) -{ - unsigned long start_pfn = PFN_UP(start); - unsigned long end_pfn = min_t(unsigned long, - PFN_DOWN(end), max_low_pfn); - - if (start_pfn >= end_pfn) - return 0; - - __free_pages_memory(start_pfn, end_pfn); - - return end_pfn - start_pfn; -} - -static void __init memmap_init_reserved_pages(void) -{ - struct memblock_region *region; - phys_addr_t start, end; - u64 i; - - /* initialize struct pages for the reserved regions */ - for_each_reserved_mem_range(i, &start, &end) - reserve_bootmem_region(start, end); - - /* and also treat struct pages for the NOMAP regions as PageReserved */ - for_each_mem_region(region) { - if (memblock_is_nomap(region)) { - start = region->base; - end = start + region->size; - reserve_bootmem_region(start, end); - } - } -} - -static unsigned long __init free_low_memory_core_early(void) -{ - unsigned long count = 0; - phys_addr_t start, end; - u64 i; - - memblock_clear_hotplug(0, -1); - - memmap_init_reserved_pages(); - - /* - * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id - * because in some case like Node0 doesn't have RAM installed - * low ram will be on Node1 - */ - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, - NULL) - count += __free_memory_core(start, end); - - return count; -} - -static int reset_managed_pages_done __initdata; - -void reset_node_managed_pages(pg_data_t *pgdat) -{ - struct zone *z; - - for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) - atomic_long_set(&z->managed_pages, 0); -} - -void __init reset_all_zones_managed_pages(void) -{ - struct pglist_data *pgdat; - - if (reset_managed_pages_done) - return; - - for_each_online_pgdat(pgdat) - reset_node_managed_pages(pgdat); - - reset_managed_pages_done = 1; -} - -/** - * memblock_free_all - release free pages to the buddy allocator - */ -void __init memblock_free_all(void) -{ - unsigned long pages; - - free_unused_memmap(); - reset_all_zones_managed_pages(); - - pages = free_low_memory_core_early(); - totalram_pages_add(pages); -} - -#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) - -static int memblock_debug_show(struct seq_file *m, void *private) -{ - struct memblock_type *type = m->private; - struct memblock_region *reg; - int i; - phys_addr_t end; - - for (i = 0; i < type->cnt; i++) { - reg = &type->regions[i]; - end = reg->base + reg->size - 1; - - seq_printf(m, "%4d: ", i); - seq_printf(m, "%pa..%pa\n", ®->base, &end); - } - return 0; -} -DEFINE_SHOW_ATTRIBUTE(memblock_debug); - -static int __init memblock_init_debugfs(void) -{ - struct dentry *root = debugfs_create_dir("memblock", NULL); - - debugfs_create_file("memory", 0444, root, - &memblock.memory, &memblock_debug_fops); - debugfs_create_file("reserved", 0444, root, - &memblock.reserved, &memblock_debug_fops); -#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - debugfs_create_file("physmem", 0444, root, &physmem, - &memblock_debug_fops); -#endif - - return 0; -} -__initcall(memblock_init_debugfs); - -#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 05e138bbda6f7..3ca934603a53a 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1315,7 +1315,8 @@ void __init early_init_dt_scan_nodes(void) /* Setup memory, calling early_init_dt_add_memory_arch */ early_init_dt_scan_memory(); - + + /* IAMROOT20_20240316 START */ /* Handle linux,usable-memory-range property */ early_init_dt_check_for_usable_mem_range(); } diff --git a/kernel/jump_label.c b/kernel/jump_label.c index d9c822bbffb8d..36114c6df0269 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -67,6 +67,11 @@ static void jump_label_swap(void *a, void *b, int size) struct jump_entry *jeb = b; struct jump_entry tmp = *jea; + /* IAMROOT20 20240316 + * jump_entry_code/target/key() 함수에서 + * '주소 + 값'으로 값을 비교 + * -> '주소'가 변한 만큼(delta) '값'에서 보상을 해줘야 한다 + */ jea->code = jeb->code - delta; jea->target = jeb->target - delta; jea->key = jeb->key - delta; From 786e106d37a453ec6a9f0f4e1d7c3ed0cebbccec Mon Sep 17 00:00:00 2001 From: park-seong-su Date: Sat, 23 Mar 2024 14:13:05 +0900 Subject: [PATCH 046/104] IAMROOT20 20240314 - Add comments to jump_label_init, parse_early_param --- init/main.c | 4 ++++ kernel/jump_label.c | 16 ++++++++++++++++ kernel/params.c | 11 +++++++++++ 3 files changed, 31 insertions(+) diff --git a/init/main.c b/init/main.c index f1086425b1cbe..a10a5b6e178a9 100644 --- a/init/main.c +++ b/init/main.c @@ -770,6 +770,10 @@ void __init parse_early_param(void) return; /* All fall through to do_early_param. */ + /* IAMROOT20 20240316 + * boot_command_line에는 dt에서 가져온 bootargs의 문자열이 저장되어 있음 + * ex) boot_command_line = "console=ttyS0,115200n8 earlyprintk" + */ strscpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE); parse_early_options(tmp_cmdline); done = 1; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 36114c6df0269..00115a50070c7 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -512,17 +512,33 @@ void __init jump_label_init(void) jump_label_lock(); jump_label_sort_entries(iter_start, iter_stop); + /* IAMROOT20 20240316 + * 모든 jump entry 순회 + */ for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; bool in_init; /* rewrite NOPs */ + /* IAMROOT20 20240316 + * nop 엔트리일 경우 nop 명령어로 교환 + * arm64의 경우 컴파일 타임에 nop으로 생성되어 있으므로 생략됨 + */ if (jump_label_type(iter) == JUMP_LABEL_NOP) arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); + /* IAMROOT20 20240316 + * entry의 code가 init 섹션에 있는지 확인 + * init 섹션에 있을 경우 나중에 init 섹션 내용은 삭제되므로 + * init 섹션안에 포함된 jump entry들은 업데이트 할 필요가 없음 + */ in_init = init_section_contains((void *)jump_entry_code(iter), 1); jump_entry_set_init(iter, in_init); + /* IAMROOT20 20240316 + * key의 type의 하위 2비트에 nop 엔트리인지 jump 엔트리인지 기록하고 + * key의 entries와 sorting되어 있는 첫번째 jump 엔트리만 연결 + */ iterk = jump_entry_key(iter); if (iterk == key) continue; diff --git a/kernel/params.c b/kernel/params.c index 6a7548979aa9a..b640cc3e611e4 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -151,6 +151,10 @@ static int parse_one(char *param, if (handle_unknown) { pr_debug("doing %s: %s='%s'\n", doing, param, val); + /* IAMROOT20 20240316 + * ex) do_early_param("console", "ttyS0,115200n8", "early options", NULL); + */ + /* IAMROOT20_END 20240316 */ return handle_unknown(param, val, doing, arg); } @@ -172,6 +176,9 @@ char *parse_args(const char *doing, char *param, *val, *err = NULL; /* Chew leading spaces */ + /* IAMROOT20 20240316 + * args 앞에 존재하는 space를 제거 + */ args = skip_spaces(args); if (*args) @@ -181,6 +188,10 @@ char *parse_args(const char *doing, int ret; int irq_was_disabled; + /* IAMROOT20 20240316 + * ex) args = "console=ttyS0,115200n8 earlyprintk" + * param = console, val = ttyS0,115200n8 + */ args = next_arg(args, ¶m, &val); /* Stop at -- */ if (!val && strcmp(param, "--") == 0) From eb4506ed0797d1148571aa195dd1c19ba47594de Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 23 Mar 2024 22:08:00 +0900 Subject: [PATCH 047/104] IAMROOT20 20240323 - Add comments Signed-off-by: Daero Lee --- arch/arm64/kernel/setup.c | 1 + drivers/tty/serial/earlycon.c | 40 ++++++++++++++++++++++++++++++++++- init/main.c | 4 ++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index df91916838db1..60a37bf245a9f 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -351,6 +351,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) */ local_daif_restore(DAIF_PROCCTX_NOIRQ); + /* IAMROOT20_20240323 END */ /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c index a5fbb6ed38aed..2b80465b0f3ff 100644 --- a/drivers/tty/serial/earlycon.c +++ b/drivers/tty/serial/earlycon.c @@ -36,6 +36,11 @@ static struct earlycon_device early_console_dev = { .con = &early_con, }; +/* IAMROOT20_20240323 + * boot command line 파라티머로 들어온 physical address를 + * FIX_EARLYCON_MEM_BASE 에 매핑한다. + * FIX_EARLYCON_MEM_BASE virtual address + page offset을 반환한다 + */ static void __iomem * __init earlycon_map(resource_size_t paddr, size_t size) { void __iomem *base; @@ -60,6 +65,11 @@ static void __init earlycon_init(struct earlycon_device *device, size_t len; /* scan backwards from end of string for first non-numeral */ + /* IAMROOT20_20240223 + * ex) name = "pl011" + * - earlycon->index = 011 + * - earlycon->name = "pl" + */ for (s = name + strlen(name); s > name && s[-1] >= '0' && s[-1] <= '9'; s--) @@ -96,6 +106,12 @@ static int __init parse_options(struct earlycon_device *device, char *options) int length; resource_size_t addr; + /* IAROOT20_20240223 + * ex) options = "0x3f201000,115200n8" + * - iotype = UPIO_MEM + * - addr = 0x3f201000 + * - options = "115200n8" + */ if (uart_parse_earlycon(options, &port->iotype, &addr, &options)) return -EINVAL; @@ -147,6 +163,10 @@ static int __init register_earlycon(char *buf, const struct earlycon_id *match) spin_lock_init(&port->lock); if (!port->uartclk) port->uartclk = BASE_BAUD * 16; + /* IAMROOT20_20240223 + * port->mapbase를 FIX_EARLYCON_MEM_BASE에 매핑하고, + * virtual address를 다시 저장 + */ if (port->mapbase) port->membase = earlycon_map(port->mapbase, 64); @@ -192,6 +212,15 @@ int __init setup_earlycon(char *buf) return -EALREADY; again: + /* IAMROOT20_20240323 + * ex) rpi2 early console : "earlycon=pl011,0x3f201000,115200n8" + * OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup); + * + * struct earlycon_id + * - name = "pl011" + * - compatible = "arm,pl011" + * - setup = pl011_early_console_setup + */ for (match = __earlycon_table; match < __earlycon_table_end; match++) { size_t len = strlen(match->name); @@ -201,7 +230,13 @@ int __init setup_earlycon(char *buf) /* prefer entries with empty compatible */ if (empty_compatible && *match->compatible) continue; - + /* IAMROOT20_20240323 + * ex) buf = "pl011,0x3f201000,115200n8" + * buf[len] -> ','를 가리킴 + * + * buf += len + 1; 이후에 + * buf = "0x3f201000,115200n8" + */ if (buf[len]) { if (buf[len] != ',') continue; @@ -227,6 +262,9 @@ int __init setup_earlycon(char *buf) bool earlycon_acpi_spcr_enable __initdata; /* early_param wrapper for setup_earlycon() */ +/* IAMROOT20_20240323 + * ex) buf = "ttyS0,115200n8" + */ static int __init param_setup_earlycon(char *buf) { int err; diff --git a/init/main.c b/init/main.c index a10a5b6e178a9..fd9863f53ec9e 100644 --- a/init/main.c +++ b/init/main.c @@ -736,6 +736,10 @@ noinline void __ref __noreturn rest_init(void) } /* Check for early params. */ +/* IAMROOT20_20240323 START + * ex) param = "console", val = "ttyS0,115200n8" + * unused = "early options", arg = null + */ static int __init do_early_param(char *param, char *val, const char *unused, void *arg) { From 7278f69170d9e279ffd1914d2c60d6b1bfef1d7f Mon Sep 17 00:00:00 2001 From: park-seong-su Date: Sat, 30 Mar 2024 22:07:30 +0900 Subject: [PATCH 048/104] IAMROOT20 20240330 - Add comments to arm64_memblock_init --- arch/arm64/include/asm/kernel-pgtable.h | 7 +++ arch/arm64/include/asm/mmu_context.h | 17 ++++++ arch/arm64/kernel/setup.c | 7 +++ arch/arm64/mm/init.c | 76 +++++++++++++++++++++++++ drivers/of/fdt.c | 49 +++++++++++++++- 5 files changed, 155 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index d03bbceaf5c22..4af95d1dec9b3 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -146,6 +146,13 @@ * has a direct correspondence, and needs to appear sufficiently aligned * in the virtual address space. */ +/* IAMROOT20 20240330 + * ex) 4k page 기준 + * ARM64_MEMSTART_SHIFT = 30 + * SECTION_SIZE_BITS = 27 + * ARM64_MEMSTART_SHIFT > SECTION_SIZE_BITS 이므로 + * ARM64_MEMSTART_ALIGN = 0x0000_0000_4000_0000(1GB) + */ #if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS #define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) #else diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 56911691bef05..14274db43e174 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -96,12 +96,29 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) */ static inline void cpu_uninstall_idmap(void) { + /* IAMROOT20 20240330 + * current는 init_task (head.S에서 sp_el0에 init_task 주소를 적어놨음) + * current->active_mm은 init_mm을 가리킴 (현재 init_mm.pgd는 init_pg_dir이 들어있음) + */ struct mm_struct *mm = current->active_mm; + /* IAMROOT20 20240330 + * reserved_pg_dir의 물리 주소를 ttbr0_el1에 write + * reserved_pg_dir은 아무 정보가 없는 zero page table + */ cpu_set_reserved_ttbr0(); + /* IAMROOT20 20240330 + * tlb invalidation + */ local_flush_tlb_all(); + /* IAMROOT20 20240330 + * tcr_el1.t0sz의 값을 현재 사용중인 va bit로 설정해줌 + */ cpu_set_default_tcr_t0sz(); + /* IAMROOT20 20240330 + * mm과 init_mm이 같으므로 현재 호출되지 않음 + */ if (mm != &init_mm && !system_uses_ttbr0_pan()) cpu_switch_mm(mm->pgd, mm); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 60a37bf245a9f..7cfa4b68673c7 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -356,9 +356,16 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ + /* IAMROOT20_START 20240330 */ cpu_uninstall_idmap(); + /* IAMROOT20 20240330 + * xen_early_init은 분석하지 않음 + */ xen_early_init(); + /* IAMROOT20 20240330 + * efi_init은 분석하지 않음 + */ efi_init(); if (!efi_enabled(EFI_BOOT)) { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 66e70ca476805..23f48e69e4ca7 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -260,6 +260,11 @@ early_param("mem", early_mem); void __init arm64_memblock_init(void) { + /* IAMROOT20 20240330 + * ex) PAGE_END = 0xffff_8000_0000_0000 + * _PAGE_OFFSET(vabits_actual) = 0xffff_0000_0000_0000 + * 0xffff_0000_0000_0000 ~ 0xffff_8000_0000_0000은 linear + */ s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual); /* @@ -277,11 +282,19 @@ void __init arm64_memblock_init(void) } /* Remove memory above our supported physical address size */ + /* IAMROOT20 20240330 + * ex) pa 48bit 사용 시 + * 0x0001_0000_0000_0000 ~ 0xffff_ffff_ffff_ffff의 범위의 물리 메모리는 사용되지 않음으로 memblock.memory에서 제거 + */ memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); /* * Select a suitable value for the base of physical memory. */ + /* IAMROOT20 20240330 + * memblock.memory.regions[0]의 base를 ARM64_MEMSTART_ALIGN(ex 1GB)에 맞춰서 내림하여 memstart_addr을 설정 + * ex) memstart_addr = 0x0000_0000_4000_0000 + */ memstart_addr = round_down(memblock_start_of_DRAM(), ARM64_MEMSTART_ALIGN); @@ -293,12 +306,26 @@ void __init arm64_memblock_init(void) * linear mapping. Take care not to clip the kernel which may be * high in memory. */ + /* IAMROOT20 20240330 + * ex) memstart_addr = 0x0000_0000_4000_0000 + * linear_region_size = 0x0000_8000_0000_0000 + * memstart_addr + linear_region_size과 __pa_symbol(_end) 중 더 큰 값부터 0xffff_ffff_ffff_ffff까지 범위를 memblock.memory에서 제거 + */ memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa_symbol(_end)), ULLONG_MAX); + /* IAMROOT20 20240330 + * memstart_addr부터 memblock_end_of_DRAM()까지의 크기가 linear_region_size보다 큰 경우 + */ if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) { /* ensure that memstart_addr remains sufficiently aligned */ + /* IAMROOT20 20240330 + * memblock_end_of_DRAM()에서 아래쪽으로 linear_region_size만큼으로 memstart_addr을 재설정 + */ memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size, ARM64_MEMSTART_ALIGN); + /* IAMROOT20 20240330 + * 0부터 새로운 memstart_addr까지 memblock.memory에서 제거 + */ memblock_remove(0, memstart_addr); } @@ -309,6 +336,10 @@ void __init arm64_memblock_init(void) * we have to move it upward. Since memstart_addr represents the * physical address of PAGE_OFFSET, we have to *subtract* from it. */ + /* IAMROOT20 20240330 + * CONFIG_VA_52bit이고 vabits_actual이 52bit가 아닐 경우 + * 52bit 기준으로 설정된 가상 주소를 48bit 기준으로 변경 + */ if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) memstart_addr -= _PAGE_OFFSET(48) - _PAGE_OFFSET(52); @@ -317,17 +348,27 @@ void __init arm64_memblock_init(void) * high up in memory, add back the kernel region that must be accessible * via the linear mapping. */ + /* IAMROOT20 20240330 + * arm64의 경우 memory_limit는 PHYS_ADDR_MAX로 선언할때 초기화되어 있음 + * 만약 boot parameter의 mem으로 값이 설정되어 들어왔다면 memory_limit는 해당 값으로 변경되어 있었을 것 + */ if (memory_limit != PHYS_ADDR_MAX) { memblock_mem_limit_remove_map(memory_limit); memblock_add(__pa_symbol(_text), (u64)(_end - _text)); } + /* IAMROOT20 20240330 + * fdt에서 initrd의 메모리 영역을 설정했다면 + */ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) { /* * Add back the memory we just removed if it results in the * initrd to become inaccessible via the linear mapping. * Otherwise, this is a no-op */ + /* IAMROOT20 20240330 + * base와 size를 설정 + */ u64 base = phys_initrd_start & PAGE_MASK; u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base; @@ -339,23 +380,49 @@ void __init arm64_memblock_init(void) * each other) so that all granule/#levels combinations can * always access both. */ + /* IAMROOT20 20240330 + * base가 memblock_start_of_DRAM()보다 작거나 + * (base + size)가 (memblock_start_of_DRAm() + linear_region_size)보다 크면 + * initrd의 메모리 영역이 linear mapping될 수 없으므로 warning + */ if (WARN(base < memblock_start_of_DRAM() || base + size > memblock_start_of_DRAM() + linear_region_size, "initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) { phys_initrd_size = 0; } else { + /* IAMROOT20 20240330 + * initrd의 메모리 영역을 memblock.memory에 등록 + */ memblock_add(base, size); + /* IAMROOT20 20240330 + * memblock.memory에서 해당 region에서 MEMBLOCK_NOMAP flag를 clear + */ memblock_clear_nomap(base, size); + /* IAMROOT20 20240330 + * initrd의 메모리 영역을 memblock.reserved에 등록 + */ memblock_reserve(base, size); } } if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern u16 memstart_offset_seed; + /* IAMROOT20 20240330 + * id_aa64mmfr0_el1 레지스터 값을 읽음 + */ u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); + /* IAMROOT20 20240330 + * id_aa64mmfr0_el1.parange 필드 값을 추출 + * ex) pa 48bit, parange = 0b0101 + */ int parange = cpuid_feature_extract_unsigned_field( mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); + /* IAMROOT20 20240330 + * ex) pa 48bit + * range = 0x0000_8000_0000_0000 - 0x0001_0000_0000_0000 + * range = -0x0000_8000_0000_0000 + */ s64 range = linear_region_size - BIT(id_aa64mmfr0_parange_to_phys_shift(parange)); @@ -364,6 +431,9 @@ void __init arm64_memblock_init(void) * margin, the size of the region that the physical memory can * span, randomize the linear region as well. */ + /* IAMROOT20 20240330 + * ex) pa 48bit일 경우 range가 음수이므로 해당 if문에 진입하지 못함 + */ if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) { range /= ARM64_MEMSTART_ALIGN; memstart_addr -= ARM64_MEMSTART_ALIGN * @@ -375,7 +445,13 @@ void __init arm64_memblock_init(void) * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. */ + /* IAMROOT20 20240330 + * kernel image 물리 주소 region을 memblock.reserved에 저장 + */ memblock_reserve(__pa_symbol(_stext), _end - _stext); + /* IAMROOT20 20240330 + * initrd_start, initrd_end에 가상주소를 저장 + */ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) { /* the generic initrd code expects virtual addresses */ initrd_start = __phys_to_virt(phys_initrd_start); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 3ca934603a53a..c061586552023 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -80,6 +80,10 @@ void __init of_fdt_limit_memory(int limit) } } +/* IAMROOT20 20240330 + * status가 없거나 + * status="ok" 또는 status="okay"이면 true + */ static bool of_fdt_device_is_available(const void *blob, unsigned long node) { const char *status = fdt_getprop(blob, node, "status", NULL); @@ -479,6 +483,9 @@ static u32 of_fdt_crc32; static int __init early_init_dt_reserve_memory(phys_addr_t base, phys_addr_t size, bool nomap) { + /* IAMROOT20 20240330 + * nomap일 경우 + */ if (nomap) { /* * If the memory is already reserved (by another region), we @@ -489,8 +496,14 @@ static int __init early_init_dt_reserve_memory(phys_addr_t base, memblock_is_region_reserved(base, size)) return -EBUSY; + /* IAMROOT20 20240330 + * NOMAP flag 설정하고 reserve 등록안하고 종료 + */ return memblock_mark_nomap(base, size); } + /* IAMROOT20 20240330 + * nomap이 아닐 경우 memblock.reserved에 등록 + */ return memblock_reserve(base, size); } @@ -507,6 +520,9 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, int first = 1; bool nomap; + /* IAMROOT20 20240330 + * reg 프로퍼티 가져옴 + */ prop = of_get_flat_dt_prop(node, "reg", &len); if (!prop) return -ENOENT; @@ -517,12 +533,22 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, return -EINVAL; } + /* IAMROOT20 20240330 + * no-map 프로퍼티 가져옴 + */ nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL; while (len >= t_len) { + /* IAMROOT20 20240330 + * reg 프로퍼티에서 base와 size값을 읽음 + */ base = dt_mem_next_cell(dt_root_addr_cells, &prop); size = dt_mem_next_cell(dt_root_size_cells, &prop); + /* IAMROOT20 20240330 + * nomap일 경우 nomap flag만 설정 + * nomap이 아닐 경우 memblock.reserved에 등록 + */ if (size && early_init_dt_reserve_memory(base, size, nomap) == 0) pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n", @@ -533,6 +559,10 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, len -= t_len; if (first) { + /* IAMROOT20 20240330 + * reserved_mem에 reg의 첫번째 region 정보만 저장 + */ + /* IAMROOT20_END 20240330 */ fdt_reserved_mem_save_node(node, uname, base, size); first = 0; } @@ -545,6 +575,11 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, * in /reserved-memory matches the values supported by the current implementation, * also check if ranges property has been provided */ +/* IAMROOT20 20240330 + * reserved-memory노드에 size-cells, address-cless, ranges 속성이 있는지 확인 + * 현재 node의 size-cells 값과 dt_root_size_cells 값이 같은지 확인 + * 현재 node의 address-cells 값과 dt_root_address_cells 값이 같은지 확인 + */ static int __init __reserved_mem_check_root(unsigned long node) { const __be32 *prop; @@ -571,6 +606,9 @@ static int __init fdt_scan_reserved_mem(void) int node, child; const void *fdt = initial_boot_params; + /* IAMROOT20 20240330 + * reserved-memory 노드를 가리킴 + */ node = fdt_path_offset(fdt, "/reserved-memory"); if (node < 0) return -ENODEV; @@ -580,13 +618,22 @@ static int __init fdt_scan_reserved_mem(void) return -EINVAL; } + /* IAMROOT20 20240330 + * reserved-memory노드의 sub-node들을 순회 + */ fdt_for_each_subnode(child, fdt, node) { const char *uname; int err; if (!of_fdt_device_is_available(fdt, child)) continue; - + /* IAMROOT20 20240330 + * ex) hyp_mem: hyp@80000000 { + * reg = <0x0 0x80000000 0x0 0x600000>; + * no-map; + * }; + * uname = "hyp_mem" + */ uname = fdt_get_name(fdt, child, NULL); err = __reserved_mem_reserve_reg(child, uname); From 4b884f3ea4101478489d34d96ab5e923ee0aa2ba Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 6 Apr 2024 18:06:28 +0900 Subject: [PATCH 049/104] IAMROOT20 20240406 - Add comments Signed-off-by: Daero Lee --- arch/arm64/include/asm/pgtable.h | 7 +++++++ drivers/of/fdt.c | 23 +++++++++++++++++++++++ drivers/of/of_reserved_mem.c | 15 ++++++++++++++- include/linux/pageblock-flags.h | 6 ++++++ scripts/dtc/libfdt/fdt_ro.c | 3 +++ 5 files changed, 53 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 22a5870016d61..9eb15aeaa5d2d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -388,6 +388,13 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Hugetlb definitions. */ +/* IAMROOT20 20240406 + * ex) VA_BITS : 39, page size : 4KB + * - HPAGE_SHIFT PMD_SHIFT(21) + * - HPAGE_SIZE (1 << 21) + * - HPAGE_MASK ~((1 << 21) - 1) + * - HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) = 21 - 12 = 9 + */ #define HUGE_MAX_HSTATE 4 #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index c061586552023..79940a684e531 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -563,6 +563,9 @@ static int __init __reserved_mem_reserve_reg(unsigned long node, * reserved_mem에 reg의 첫번째 region 정보만 저장 */ /* IAMROOT20_END 20240330 */ + /* IAMROOT20_START 20240406 + * reserved memory 영역을 reserved_mem[64]에 저장한다. + */ fdt_reserved_mem_save_node(node, uname, base, size); first = 0; } @@ -637,6 +640,22 @@ static int __init fdt_scan_reserved_mem(void) uname = fdt_get_name(fdt, child, NULL); err = __reserved_mem_reserve_reg(child, uname); + /* IAMROOT20 20240406 + * reg property가 없는 경우 -ENOENT를 return + * - size property가 있는 경우에는 reserved_mem[64]에 base와size를 0으로 해서 저장 + * ex) reserved-memory { + * #address-cells = <1>; + * #size-cells = <1>; + * ranges; + * + * linux,cma@80000000 { + * compatible = "shared-dma-pool"; + * alloc-ranges = <0x80000000 0x30000000>; + * size = <0x10000000>; + * linux,cma-default; + * reusable; + * }; + */ if (err == -ENOENT && of_get_flat_dt_prop(child, "size", NULL)) fdt_reserved_mem_save_node(child, uname, 0, 0); } @@ -687,6 +706,10 @@ void __init early_init_fdt_scan_reserved_mem(void) /* Process header /memreserve/ fields */ for (n = 0; ; n++) { + /* IAMROOT20 20240406 + * initial_boot_params(fdt 가상주소)에서 memory reservation block 영역의 + * base, size를 읽어와서 size가 있는 경우 memblock.reserved에 저장 + */ fdt_get_mem_rsv(initial_boot_params, n, &base, &size); if (!size) break; diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 948efa9f99e3b..d6b660343bb4a 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -119,6 +119,9 @@ static int __init __reserved_mem_alloc_size(unsigned long node, && of_flat_dt_is_compatible(node, "shared-dma-pool") && of_get_flat_dt_prop(node, "reusable", NULL) && !nomap) + /* IAMROOT20 20240406 + * ex) CMA_MIN_ALIGNMENT_BYTES 2M (VA_BITS:39, page size:4KB) + */ align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES); prop = of_get_flat_dt_prop(node, "alloc-ranges", &len); @@ -137,6 +140,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, end = start + dt_mem_next_cell(dt_root_size_cells, &prop); + /* IAMROOT20 20240406 + * start ~ end 사이에서 size 만큼reserved memory 영역을 할당한다 + * - base에 할당받은 영역의 시작 주소가 저장된다 + */ ret = early_init_dt_alloc_reserved_memory_arch(size, align, start, end, nomap, &base); if (ret == 0) { @@ -227,6 +234,9 @@ static void __init __rmem_check_for_overlap(void) if (reserved_mem_count < 2) return; + /* IAMROOT20 20240406 + * overlap을 확인하기 전에 reserved_mem의 base, size 순으로 정렬한다 + */ sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]), __rmem_cmp, NULL); for (i = 0; i < reserved_mem_count - 1; i++) { @@ -234,7 +244,10 @@ static void __init __rmem_check_for_overlap(void) this = &reserved_mem[i]; next = &reserved_mem[i + 1]; - + + /* IAMROOT20 20240406 + * this영역과 next 영역이 overlap하는 경우 error log 출력 + */ if (this->base + this->size > next->base) { phys_addr_t this_end, next_end; diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e83c4c0950417..d761762ecd67f 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,6 +41,12 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ +/* IAMROOT20 20240406 + * ex) VA_BITS : 39, page size : 4KB + * - HUGETLB_PAGE_ORDER 9 + * - MAX_ORDER 10 + * - pageblock_order (unsigned int)9 + */ #define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ diff --git a/scripts/dtc/libfdt/fdt_ro.c b/scripts/dtc/libfdt/fdt_ro.c index 9f6c551a22c2f..c1dfaadc94f9a 100644 --- a/scripts/dtc/libfdt/fdt_ro.c +++ b/scripts/dtc/libfdt/fdt_ro.c @@ -177,6 +177,9 @@ int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size) const struct fdt_reserve_entry *re; FDT_RO_PROBE(fdt); + /* IAMROOT20 20240406 + * fdt의 memory reservation block에서 n번째 위치의 fdt_reserve_enty 포인터를 가져옴 + */ re = fdt_mem_rsv(fdt, n); if (!can_assume(VALID_INPUT) && !re) return -FDT_ERR_BADOFFSET; From 0f3fa5534131eae3aee221e8e3ae89cb9abcbed8 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 6 Apr 2024 13:13:12 +0000 Subject: [PATCH 050/104] IAMROOT20 20240406 - Add comments on of_reserved_mem.c, memblock.h, memblock.c --- drivers/of/of_reserved_mem.c | 7 +++++++ include/linux/memblock.h | 3 +++ mm/memblock.c | 28 +++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index d6b660343bb4a..ff5d828a21414 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -37,8 +37,15 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, phys_addr_t base; int err = 0; + /* IAMROOT20 20240406 + * MEMBLOCK_ALLOC_ANYWHERE = 0xffff_ffff_ffff_ffff + * SMP_CACHE_BYTES = (1 << 6) = 64 + */ end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end; align = !align ? SMP_CACHE_BYTES : align; + /* IAMROOT20 20240406 + * base = 할당된 메모리 영역의 시작 주소 + */ base = memblock_phys_alloc_range(size, align, start, end); if (!base) return -ENOMEM; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index d34b509cd4057..d371ea97e906d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -200,6 +200,9 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL */ +/* IAMROOT20 20240406 + * ULLONG_MAX가 반환될 때까지 역순으로 순회하며, 가장 첫 번째 reserved되지 않은 영역으로 설정 + */ #define __for_each_mem_range_rev(i, type_a, type_b, nid, flags, \ p_start, p_end, p_nid) \ for (i = (u64)ULLONG_MAX, \ diff --git a/mm/memblock.c b/mm/memblock.c index e37f705e2ba8c..67dfd38503ff2 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -280,10 +280,16 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, this_start = clamp(this_start, start, end); this_end = clamp(this_end, start, end); + /* IAMROOT20 20240406 + * 만약 할당된 크기가 size보다 작으면 continue + */ if (this_end < size) continue; cand = round_down(this_end - size, align); + /* IAMROOT20 20240406 + * 영역을 할당받았지만, round_down된 주소가 this_start보다 작으면 continue + */ if (cand >= this_start) return cand; } @@ -1130,6 +1136,9 @@ static bool should_skip_region(struct memblock_type *type, if (type != memblock_memory) return false; + /* IAMROOT20 20240406 + * NUMA_NO_NODE이거나, 현재 region의 노드의 nid가, 찾고자 하는 노드의 nid와 다른 경우 skip + */ /* only memory regions are associated with nodes, check it */ if (nid != NUMA_NO_NODE && nid != m_nid) return true; @@ -1309,6 +1318,9 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) nid = NUMA_NO_NODE; + /* IAMROOT20 20240406 + * 만약 idx가 가장 끝 주소인 경우, idx_a를 마지막 리전이 되고, idx_b를 마지막 리전의 + 1이 된다 + */ if (*idx == (u64)ULLONG_MAX) { idx_a = type_a->cnt - 1; if (type_b != NULL) @@ -1327,6 +1339,9 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, if (should_skip_region(type_a, m, nid, flags)) continue; + /* IAMROOT20 20240406 + * 만약 type_b가 지정되지 않으면, 첫 번째 loop의 memblock 영역을 반환 + */ if (!type_b) { if (out_start) *out_start = m_start; @@ -1345,6 +1360,11 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, phys_addr_t r_start; phys_addr_t r_end; + /* IAMROOT20 20240406 + * idx_b의 리전을 바탕으로, 해당 리전의 이전 리전의 끝 주소를 r_start로 설정 + * 현재 리전의 시작 주소를 r_end로 설정 + * (만약 idx_b가 마지막 리전 이후를 가리키면 PHYS_ADDR_MAX로 설정) + */ r = &type_b->regions[idx_b]; r_start = idx_b ? r[-1].base + r[-1].size : 0; r_end = idx_b < type_b->cnt ? @@ -1353,7 +1373,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, * if idx_b advanced past idx_a, * break out to advance idx_a */ - if (r_end <= m_start) break; /* if the two regions intersect, we're done */ @@ -1368,6 +1387,13 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, idx_a--; else idx_b--; + /* IAMROOT20 20240406 + * idx_a = 0, idx_b = 1 + * idx = 0000_0001_0000_0000 + * + * idx_a = -1, idx_b = 0 + * idx = 0000_0000_ffff_ffff + */ *idx = (u32)idx_a | (u64)idx_b << 32; return; } From c28054601f65e45cb768115b5c88ec09067d12b7 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 13 Apr 2024 22:12:28 +0900 Subject: [PATCH 051/104] IAMROOT20 20240413 - Add comments Signed-off-by: Daero Lee --- arch/arm64/kernel/setup.c | 1 + arch/arm64/mm/init.c | 5 +++++ drivers/of/of_reserved_mem.c | 19 +++++++++++++++++++ kernel/dma/coherent.c | 12 ++++++++++++ mm/memblock.c | 10 +++++++++- 5 files changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 7cfa4b68673c7..99dbf2765fcea 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -376,6 +376,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) } arm64_memblock_init(); + /* IAMROOT20_END 20240413 */ paging_init(); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 23f48e69e4ca7..c184d458419d6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -460,6 +460,11 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); + /* IAMROOT20 20240413 + * __va : 물리 주소를 '리니어 커널 메모리 매핑 영역'의 가상 주소로 변환 + * - 리니어 커널 메모리 매핑 영역(4K, VA_BITS=48) + * : PAGE_OFFSET(0xffff_0000_0000_0000) ~ 0xffff_8000_0000_0000 + */ high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index ff5d828a21414..f3fd477a5a3cf 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -131,6 +131,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, */ align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES); + /* IAMROOT20_START 20240413 + * "alloc-ranges" property가 있으면, start ~ end 사이에서 memory 할당 + * 없으면, 전체 영역(0 ~ 0xffff_..._ffff)에서 memory 할당 + */ prop = of_get_flat_dt_prop(node, "alloc-ranges", &len); if (prop) { @@ -143,6 +147,10 @@ static int __init __reserved_mem_alloc_size(unsigned long node, base = 0; while (len > 0) { + /* IAMROOT20 20240413 + * alloc-ranges에 base, size가 여러개가 명시되엉 있는 경우 + * 순서대로 할당을 시도하고, 성공하면 break로 빠져나감 + */ start = dt_mem_next_cell(dt_root_addr_cells, &prop); end = start + dt_mem_next_cell(dt_root_size_cells, &prop); @@ -159,6 +167,12 @@ static int __init __reserved_mem_alloc_size(unsigned long node, (unsigned long)(size / SZ_1M)); break; } + /* IAMROOT20 20240413 + * alloc-ranges에 base, size가 여러개가 명시되어 있는 경우 + * 다음 range에서 할당하기 위해서 len -= t_len을 수행 + * ex) alloc-ranges = <0x80000000 0x30000000 + * 0x90000000 0x30000000>; + */ len -= t_len; } @@ -194,6 +208,11 @@ static int __init __reserved_mem_init_node(struct reserved_mem *rmem) const struct of_device_id *i; int ret = -ENOENT; + /* IAMROOT20 20240413 + * __reservedmem_of_table은 RESERVEDMEM_OF_DECLARE 매크로로 of_device_id 구조체를 등록함 + * ex) RESERVEDMEM_OF_DECLARE(tegra210_emc_table, "nvidia,tegra210-emc-table", + * tegra210_emc_table_init); + */ for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) { reservedmem_of_init_fn initfn = i->data; const char *compat = i->compatible; diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index c21abc77c53e9..255aabfa00802 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -358,6 +358,18 @@ static const struct reserved_mem_ops rmem_dma_ops = { .device_release = rmem_dma_device_release, }; +/* IAMROOT20 20240413 + * ex) ipa_fws_region: ipa@f6800000 { + * compatible = "shared-dma-pool"; + * reg = <0x0 0xf6800000 0x0 0x5000>; + * no-map; + * }; + * zap_shader_region: gpu@f6900000 { + * compatible = "shared-dma-pool"; + * reg = <0x0 0xf6900000 0x0 0x2000>; + * no-map; + * }; + */ static int __init rmem_dma_setup(struct reserved_mem *rmem) { unsigned long node = rmem->fdt_node; diff --git a/mm/memblock.c b/mm/memblock.c index 67dfd38503ff2..f214a38127615 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1579,7 +1579,11 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, flags); if (found && !memblock_reserve(found, size)) goto done; - + + /* IAMROOT20 20240413 + * nid가 지정되어있고, exact_nid가 false 인 경우 + * nid를 NUMA_NO_NODE로 변경하여 모든 nid에 대해 memory를 찾는다 + */ if (nid != NUMA_NO_NODE && !exact_nid) { found = memblock_find_in_range_node(size, align, start, end, NUMA_NO_NODE, @@ -1588,6 +1592,10 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, goto done; } + /* IAMROOT20 20240413 + * MEMBLOCK_MIRROR region에서 먼저 memory 할당을 시도했지만 실패하는 경우 + * mirror가 아닌 region에서 다시 할당을 시도한다 + */ if (flags & MEMBLOCK_MIRROR) { flags &= ~MEMBLOCK_MIRROR; pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", From 71d273f7b25fb2d7594c33b52c672faebad02e2d Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 20 Apr 2024 23:08:51 +0900 Subject: [PATCH 052/104] IAMROOT20 20240420 - Add comments Signed-off-by: Daero Lee --- arch/arm64/include/asm/pgalloc.h | 4 ++-- arch/arm64/include/asm/pgtable.h | 5 +++++ arch/arm64/kernel/setup.c | 1 + arch/arm64/mm/fixmap.c | 4 ++++ arch/arm64/mm/mmu.c | 29 ++++++++++++++++++++++++++--- mm/vmalloc.c | 3 +++ 6 files changed, 41 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 6a7fbcc5de960..b9d255ac9665e 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -22,8 +22,8 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) { - /* IAMROOT20 20231202 - * pudp = (pmdp | prot) + /* IAMROOT20 20231202, 20240420 + * (*pudp) = (pmdp | prot) */ set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 9eb15aeaa5d2d..c3a75ecce0aa0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -810,6 +810,11 @@ static inline pmd_t *pud_pgtable(pud_t pud) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { + /* IAMROOT20 20240420 + * fixmap_copy() 에서 호출하는 경우, + * swapper_pg_dir을 FIX_PGD에 mapping한 후 이 함수로 들어오기 때문에 + * 아래 in_swapper_pgdir 함수에서 false를 return + */ if (in_swapper_pgdir(p4dp)) { set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); return; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 99dbf2765fcea..55b764d9188e0 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -378,6 +378,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) arm64_memblock_init(); /* IAMROOT20_END 20240413 */ + /* IAMROOT20_START 20240420 */ paging_init(); acpi_table_upgrade(); diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index e3b4a755861bc..cef0cdda4116b 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -203,6 +203,10 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) */ void __init fixmap_copy(pgd_t *pgdir) { + /* IAMROOT20 20240420 + * init_pg_dir에서 FIXADDR_TOT_START 주소가 가리키는 index의 descriptor를 + * swapper_pg_dir에 복사한다 + */ if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdir, FIXADDR_TOT_START)))) { /* * The fixmap falls in a separate pgd to the kernel, and doesn't diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 43d0a2638b11a..4d6c98f91d422 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -118,6 +118,10 @@ static phys_addr_t __init early_pgtable_alloc(int shift) * slot will be free, so we can (ab)use the FIX_PTE slot to initialise * any level of table. */ + /* IAMROOT20 20240420 + * cpu에서는 물리주소(phys)에 접근할 수 없기 때문에, FIX_PTE에 mapping + * 하여 cpu에서 접근할 수 있도록 설정 + */ ptr = pte_set_fixmap(phys); memset(ptr, 0, PAGE_SIZE); @@ -126,6 +130,9 @@ static phys_addr_t __init early_pgtable_alloc(int shift) * Implicit barriers also ensure the zeroed page is visible to the page * table walker */ + /* IAMROOT20 20240420 + * cpu에서 가상 주소로 page를 clear해 준다음, FIX_PTE mapping을 제거 + */ pte_clear_fixmap(); return phys; @@ -351,15 +358,20 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, if (flags & NO_EXEC_MAPPINGS) p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); + /* IAMROOT20 20240420 + * ex) pgtable_alloc = early_pgtable_alloc의 경우 + * - memblock에서 1 PAGE를 할당하고, 물리 주소를 return + */ pud_phys = pgtable_alloc(PUD_SHIFT); __p4d_populate(p4dp, pud_phys, p4dval); p4d = READ_ONCE(*p4dp); } BUG_ON(p4d_bad(p4d)); - /* IAMROOT20 20231209 - * FIX_PUD를 bm_pud에 매핑한다 - * pudp = virt(FIX_PUD) + (phys_addr & (PAGE_SIZE - 1)) + /* IAMROOT20 20231209, 20240420 + * - pudp(물리주소) = (*p4dp) + pud_index(addr)를 FIX_PUD에 매핑한다 + * - pudp(가상주소) = virt(FIX_PUD) + (pudp 물리주소 & (PAGE_SIZE - 1))를 + * return */ pudp = pud_set_fixmap_offset(p4dp, addr); do { @@ -757,6 +769,9 @@ static void __init map_kernel(pgd_t *pgdp) * mapping to install SW breakpoints. Allow this (only) when * explicitly requested with rodata=off. */ + /* IAMROOT20 20240420 + * SW breakpoint -> "b ." + */ pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; /* @@ -782,6 +797,7 @@ static void __init map_kernel(pgd_t *pgdp) map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); fixmap_copy(pgdp); + /* IAMROOT20_END 20240420 */ kasan_copy_shadow(pgdp); } @@ -820,6 +836,13 @@ void __init paging_init(void) pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); extern pgd_t init_idmap_pg_dir[]; + /* IAMROOT20 20240420 + * idmap_t0sz = ID map 영역이 mapping 할 수 있는 크기 + * - _end가 2^VA_BIT_MIN보다 큰 주소 위치에 있을 경우를 대비하여 설정 + * + * ex) __fls(0b1110) = 3 + * cf. __ffs(0b1110) = 1 + */ idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0)); map_kernel(pgdp); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1d13d71687d73..23be64097136c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2410,6 +2410,9 @@ void __init vm_area_add_early(struct vm_struct *vm) struct vm_struct *tmp, **p; BUG_ON(vmap_initialized); + /* IAMROOT20 20240420 + * virt addr이 작은 것부터 큰것 순서대로 삽입 + */ for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { if (tmp->addr >= vm->addr) { BUG_ON(tmp->addr < vm->addr + vm->size); From 7e3634d9d5d58c6fd7e4c0fe65e74a5d8ab202ae Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 27 Apr 2024 19:26:17 +0900 Subject: [PATCH 053/104] IAMROOT20 20240427 - Add comments Signed-off-by: Daero Lee --- arch/arm64/include/asm/mmu_context.h | 3 +++ arch/arm64/mm/mmu.c | 18 ++++++++++++++++++ include/linux/mm.h | 4 ++++ 3 files changed, 25 insertions(+) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 14274db43e174..0237346dac83f 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -173,6 +173,9 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) unsigned long daif; /* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */ + /* IAMROOT20 20240427 + * swapper_pg_dir의 phys address를 구해서 ttbr1에 저장 + */ phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp)); if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) { diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4d6c98f91d422..83d1b7817babe 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -610,6 +610,11 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { static void __init map_mem(pgd_t *pgdp) { + /* IAMROOT20 20240427 + * ex) VA_BITS_MIN = 48인 경우 + * _PAGE_END(47) = 0xFFFF_8000_0000_0000 + * : 리니어 커널 메모리 매핑 영역의 끝 주소를 의미 + */ static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); phys_addr_t kernel_start = __pa_symbol(_stext); phys_addr_t kernel_end = __pa_symbol(__init_begin); @@ -638,9 +643,17 @@ static void __init map_mem(pgd_t *pgdp) * So temporarily mark them as NOMAP to skip mappings in * the following for-loop */ + /* IAMROOT20 20240427 + * read-only text와 rodata 섹션을 nomap으로 설정하여 + * 아래 for-loop에서 매핑하는 것을 방지 + */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); /* map all the memory banks */ + /* IAMROOT20 20240427 + * memblock.memory의 모든 region을 순회하며 + * 리니어 매핑 영역(0xFFFF_0000_0000_0000 ~ 0xFFFF_8000_0000_0000)에 매핑한다 + */ for_each_mem_range(i, &start, &end) { if (start >= end) break; @@ -663,6 +676,10 @@ static void __init map_mem(pgd_t *pgdp) * Note that contiguous mappings cannot be remapped in this way, * so we should avoid them here. */ + /* IAMROOT20 20240427 + * 커널 영역을 커널 페이지 속성으로 리니어 영역에 매핑 + * - contiguous 매핑 허용하지 않음 + */ __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); @@ -798,6 +815,7 @@ static void __init map_kernel(pgd_t *pgdp) fixmap_copy(pgdp); /* IAMROOT20_END 20240420 */ + /* IAMROOT20_START 20240427 */ kasan_copy_shadow(pgdp); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 9e10485f37e7f..689076cb9b3c5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -106,6 +106,10 @@ extern int mmap_rnd_compat_bits __read_mostly; #define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x))) #endif +/* IAMROOT20 20240427 + * lm_alias : vmalloc 영역의 커널 이미지 가상 주소(x)에서 + * 리니어 매핑 영역에 매핑되어 있는 가상 주소 반환 + */ #ifndef lm_alias #define lm_alias(x) __va(__pa_symbol(x)) #endif From 873f6ca06f367d2f007a1de396cbbb66f5bdbb33 Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 27 Apr 2024 13:16:17 +0000 Subject: [PATCH 054/104] IAMROOT20 20240427 - Add comments --- arch/arm64/include/asm/alternative-macros.h | 31 +++++++++++++++++++++ arch/arm64/include/asm/cpufeature.h | 8 ++++++ arch/arm64/include/asm/daifflags.h | 8 ++++++ arch/arm64/include/asm/mmu_context.h | 9 ++++++ arch/arm64/mm/context.c | 8 ++++++ 5 files changed, 64 insertions(+) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index bdf1f6bcd0103..7e3a2389950a8 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -226,9 +226,40 @@ alternative_endif static __always_inline bool alternative_has_feature_likely(const unsigned long feature) { + /* IAMROOT20 20240427 + * ARM64_NCAPS = 88 + */ compiletime_assert(feature < ARM64_NCAPS, "feature must be < ARM64_NCAPS"); + /* IAMROOT20 20240427 + * ALTERNATIVE_CB(oldinstr, feature, cb) + * __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) + * | (ARM64_ALWAYS_SYSTEM), 1, alt_cb_patch_nops) + * + * + * if cfg_enabled == 1 + * 661: + * oldinstr( "b %l[l_no]" ) + * 662: + * .pushsection .altinstructions,"a" + * + * // ALTINSTR_ENTRY_CB(ARM64_ALWAYS_SYSTEM, alt_cb_patch_nops) + * .word 661b - . label + * .word __stringify(alt_cb_patch_nops) - . callback + * .hword __stringify(ARM64_ALWAYS_SYSTEM) feature bit + * .byte 662b-661b source len + * .byte 664f-663f replacement len + * + * .popsection + * 663: + * 664: + * endif + * return true; + * + * l_no: + * return false + */ asm_volatile_goto( ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) : diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 6bf013fb110d7..bb6ac2edb93c7 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -811,6 +811,14 @@ static inline bool system_has_full_ptr_auth(void) static __always_inline bool system_uses_irq_prio_masking(void) { + /* IAMROOT20 20240427 + * PSUDO_NMI : daif를 set해도 마스킹이 되지 않는 인터럽트(NMI)를 + * 활성화하기 위해 GIC를 사용하여 인터럽트를 발생시킨다. + * GIC : Generic Interrupt Controller + * - 인터럽트를 받아서 분배하는 하드웨어 모듈 + * + * ARM64_HAS_GIC_PRIO_MASKING = 27 + */ return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && cpus_have_const_cap(ARM64_HAS_GIC_PRIO_MASKING); } diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 55f57dfa8e2fe..5f03d0922b2bd 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -42,9 +42,17 @@ static inline unsigned long local_daif_save_flags(void) { unsigned long flags; + /* IAMROOT20 20240427 + * msr daif, flags + */ flags = read_sysreg(daif); if (system_uses_irq_prio_masking()) { + /* IAMROOT20 20240427 + * ICC_PMR_EL1 : 특정 인터럽트 필터보다 더 높은 수준의 + * 인터럽트가 발생했을 때만 core에게 신호를 보낸다. + */ + /* IAMROOT20_END 20240427 */ /* If IRQs are masked with PMR, reflect it in the flags */ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) flags |= PSR_I_BIT | PSR_F_BIT; diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0237346dac83f..aaab0103c097e 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -53,6 +53,11 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) { BUG_ON(pgd == swapper_pg_dir); cpu_set_reserved_ttbr0(); + /* IAMROOT20 20240427 + * cpu_do_switch_mm() + * - ttbr0를 pgd로 설정한다. + * - pgd, ttbr1의 ASID 필드에 mm의 asid를 설정한다. + */ cpu_do_switch_mm(virt_to_phys(pgd),mm); } @@ -192,6 +197,10 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); + /* IAMROOT20 20240427 + * idmap을 ttbr0으로 설정하고, + * ttbr0, ttbr1의 ASID를 init_mm의 asid로 설정한다. + */ __cpu_install_idmap(idmap); /* diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index e1e0dca018392..8ae303cac226e 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -356,6 +356,14 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) if (system_supports_cnp() && asid) ttbr0 |= TTBR_CNP_BIT; + /* IAMROOT20 20240427 + * TTBR_ASID_MASK = 0xffff << 48 + * __bf_shf(TTBR_ASID_MASK) = 48 + * + * FIELD_PREP(TTBR_ASID_MASK, asid) = (asid << 48 ) & (0xffff << 48) + * + * TTBR의 AISD 필드의 mask와 asid를 AND 연산한 값을 가져온다. + */ /* SW PAN needs a copy of the ASID in TTBR0 for entry */ if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN)) ttbr0 |= FIELD_PREP(TTBR_ASID_MASK, asid); From 8186235216e4ba3f4fdd06f4e744d1c74b81b322 Mon Sep 17 00:00:00 2001 From: fehead Date: Sun, 28 Apr 2024 12:15:31 +0900 Subject: [PATCH 055/104] =?UTF-8?q?IAMROOT20=20memblock.c=20=EC=A3=BC?= =?UTF-8?q?=EC=84=9D=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mm/memblock.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index f214a38127615..5a2b5704a0068 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1194,9 +1194,9 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid) { - /* IAMROOT20 20240309 - * idx 값을 절반으로 나누어 lsb: idx_a의 카운터,, msb: idx_b의 카운터 - */ + /* IAMROOT20 20240309 + * idx 값을 절반으로 나누어 lsb: idx_a의 카운터,, msb: idx_b의 카운터 + */ int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; @@ -1214,9 +1214,10 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, if (should_skip_region(type_a, m, nid, flags)) continue; - /* IAMROOT20 20240309 - * type_b에 대한 영역이 지정되지 않으면(null) 현재 1차 루프 인덱스의 memblock에 대한 영역을 반환 - */ + /* IAMROOT20 20240309 + * type_b에 대한 영역이 지정되지 않으면(null) 현재 1차 루프 + * 인덱스의 memblock에 대한 영역을 반환 + */ if (!type_b) { if (out_start) *out_start = m_start; @@ -1237,8 +1238,9 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, r = &type_b->regions[idx_b]; /* IAMROOT20 20240309 - * idx_b가 0보다 크면 현재 이전 memblock의 끝 주소를 가리키고 idx_b가 0이면 0번 주소를 지정 - */ + * idx_b가 0보다 크면 현재 이전 memblock의 끝 주소를 + * 가리키고 idx_b가 0이면 0번 주소를 지정 + */ r_start = idx_b ? r[-1].base + r[-1].size : 0; r_end = idx_b < type_b->cnt ? r->base : PHYS_ADDR_MAX; @@ -1248,16 +1250,17 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, * break out to advance idx_a */ /* IAMROOT20 20240309 - * reserve memblock 영역이 memory memblock 영역을 벗어난 경우 2차 루프를 빠져나가서 다음 memory memblock을 준비 - */ + * reserve memblock 영역이 memory memblock 영역을 벗어난 + * 경우 2차 루프를 빠져나가서 다음 memory memblock을 준비 + */ if (r_start >= m_end) break; /* if the two regions intersect, we're done */ /* IAMROOT20 20240309 - * 두 영역이 교차하는 경우 + * 두 영역이 교차하는 경우 * out_start에 하단 reserve 영역값의 끝 주소나 memory 영역값의 시작 주소중 가장 큰 주소 * out_end에 상단 reserve 영역값의 시작 주소나 memory 영역값의 끝 주소중에 가장 작은 주소 - */ + */ if (m_start < r_end) { if (out_start) *out_start = @@ -1271,9 +1274,10 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, * advanced for the next iteration. */ /* IAMROOT20 20240309 - * reserve 영역의 끝 주소가 memory 영역의 끝주소와 비교하여 큰 경우 idx_a를 증가, 다음 memory 영역을 준비 - * 크지 않은 경우 idx_b를 증가, 다음 reserve 영역을 준비 - */ + * reserve 영역의 끝 주소가 memory 영역의 끝주소와 + * 비교하여 큰 경우 idx_a를 증가, 다음 memory 영역을 준비 + * 크지 않은 경우 idx_b를 증가, 다음 reserve 영역을 준비 + */ if (m_end <= r_end) idx_a++; else From d76ab4bcf97bc116eade289347807a2f3403be2b Mon Sep 17 00:00:00 2001 From: fehead Date: Wed, 1 May 2024 16:14:36 +0900 Subject: [PATCH 056/104] =?UTF-8?q?IAMROOT20=20map=5Fmem=20=EC=A3=BC?= =?UTF-8?q?=EC=84=9D=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arch/arm64/include/asm/memory.h | 6 ++++++ arch/arm64/mm/mmu.c | 9 +++++++++ include/linux/bits.h | 5 +++++ include/linux/cma.h | 4 ++++ include/linux/mm.h | 5 +++++ include/linux/pageblock-flags.h | 3 +++ 6 files changed, 32 insertions(+) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 54c8b909bf9c9..e22e02a7b1bef 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -80,6 +80,10 @@ #define VA_BITS_MIN (VA_BITS) #endif +/* IAMROOT20 20240426 + * exam) va : 48 + * _PAGE_END(48) 0xffff800000000000 (-0x800000000000) + */ #define _PAGE_END(va) (-(UL(1) << ((va) - 1))) #define KERNEL_START _text @@ -328,6 +332,8 @@ static inline const void *__tag_set(const void *addr, u8 tag) /* * IAMROOT20 20231202: * __is_lm_address(addr) => PAGE_OFFSET <= addr < PAGE_END + * exam) VA_BITS : 48 + * 0xffff_0000_0000_0000 ~ 0xffff_8000_0000_0000 */ #define __is_lm_address(addr) (((u64)(addr) - PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4d6c98f91d422..26d80f8c15212 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -610,6 +610,9 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { static void __init map_mem(pgd_t *pgdp) { + /* IAMROOT20 20240427 + * _PAGE_END(48) 0xffff800000000000 (-0x800000000000) + */ static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); phys_addr_t kernel_start = __pa_symbol(_stext); phys_addr_t kernel_end = __pa_symbol(__init_begin); @@ -649,6 +652,12 @@ static void __init map_mem(pgd_t *pgdp) * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ + /* IAMROOT20 20240427 + * MTE : ARMv8.5 에서 추가된 보안기법으로, 메모리 할당/해제 연산마다 + * 사용되는 포인터와 (2) 접근하는 메모리 간에 상호 태그를 하고, + * 태그 정보 비교를 통해 안전한 접근인지 체크하는 일종의 Sanitizer 기법이다. + * https://velog.io/@pensieveview/MTE-Memory-Tagging-Extension-%EB%A9%94%EB%AA%A8%EB%A6%AC%ED%83%9C%EA%B9%85 + */ __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), flags); } diff --git a/include/linux/bits.h b/include/linux/bits.h index 23b766f1ac117..c7f17aa27b9c9 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -33,6 +33,11 @@ #define __GENMASK(h, l) \ (((~UL(0)) - (UL(1) << (l)) + 1) & \ (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) +/* IAMROOT20 20240427 + * ex) GENMASK(16, 4) 0x0000_0000_0001_fff0 + * GENMASK(15, 0) 0x0000_0000_0000_ffff + * GENMASK(47, 0) 0x0000_ffff_ffff_ffff + */ #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) diff --git a/include/linux/cma.h b/include/linux/cma.h index 63873b93deaa6..32d3ae3250137 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -21,6 +21,10 @@ * -- can deal with only some pageblocks of a higher-order page being * MIGRATE_CMA, we can use pageblock_nr_pages. */ +/* IAMROOT20 20240416 + * CMA_MIN_ALIGNMENT_PAGES 9 + * CMA_MIN_ALIGNMENT_BYTES 2M_SZ + */ #define CMA_MIN_ALIGNMENT_PAGES pageblock_nr_pages #define CMA_MIN_ALIGNMENT_BYTES (PAGE_SIZE * CMA_MIN_ALIGNMENT_PAGES) diff --git a/include/linux/mm.h b/include/linux/mm.h index 9e10485f37e7f..b3792bcd90dea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -107,6 +107,11 @@ extern int mmap_rnd_compat_bits __read_mostly; #endif #ifndef lm_alias +/* IAMROOT20 20240427 + * Linear Mappling alias + * PAGE_OFFSET | (x - kimage_voffset - PHYS_OFFSET) + * -> 0xffff_0000_~ | (x - kimage_voffset - memstart_addr) + */ #define lm_alias(x) __va(__pa_symbol(x)) #endif diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index d761762ecd67f..8aebcd53ea71d 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -58,6 +58,9 @@ extern unsigned int pageblock_order; #endif /* CONFIG_HUGETLB_PAGE */ +/* IAMROOT20 20240416 + * pageblock_nr_pages 512 + */ #define pageblock_nr_pages (1UL << pageblock_order) #define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) #define pageblock_aligned(pfn) IS_ALIGNED((pfn), pageblock_nr_pages) From aac4e25b780ad655c6bebc97c66afea45c19cf1b Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 11 May 2024 22:03:10 +0900 Subject: [PATCH 057/104] IAMROOT20 20240511 - Add comments Signed-off-by: Daero Lee --- arch/arm64/include/asm/daifflags.h | 9 ++++++- arch/arm64/include/asm/mmu_context.h | 5 ++++ arch/arm64/kernel/setup.c | 3 +++ arch/arm64/mm/mmu.c | 21 +++++++++++++++++ arch/arm64/mm/proc.S | 7 ++++++ drivers/of/fdt.c | 7 ++++++ scripts/dtc/libfdt/fdt.c | 35 ++++++++++++++++++++++++++++ 7 files changed, 86 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 5f03d0922b2bd..b4a888b170e88 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -25,6 +25,9 @@ static inline void local_daif_mask(void) (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF | GIC_PRIO_PSR_I_SET))); + /* IAMROOT20 20240511 + * "msr daifset, #0xf" : daif를 모두 masking 해서 인터럽트가 발생하지 않음 + */ asm volatile( "msr daifset, #0xf // local_daif_mask\n" : @@ -43,7 +46,7 @@ static inline unsigned long local_daif_save_flags(void) unsigned long flags; /* IAMROOT20 20240427 - * msr daif, flags + * mrs flags, daif */ flags = read_sysreg(daif); @@ -67,6 +70,7 @@ static inline unsigned long local_daif_save(void) flags = local_daif_save_flags(); + /* IAMROOT20_START 20240511 */ local_daif_mask(); return flags; @@ -122,6 +126,9 @@ static inline void local_daif_restore(unsigned long flags) gic_write_pmr(pmr); } + /* IAMROOT20 20240511 + * msr daif, flags + */ write_sysreg(flags, daif); if (irq_disabled) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index aaab0103c097e..531df05da191a 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -208,6 +208,11 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) * in the process of being replaced so mask everything. */ daif = local_daif_save(); + /* IAMROOT20 20240511 + * msr ttbr1_el1, ttbr1 + * + * replace_phys는 idmap에 매핑된 주소이다.(ttbr0 사용) + */ replace_phys(ttbr1); local_daif_restore(daif); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 55b764d9188e0..3129a441d0121 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -381,6 +381,9 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) /* IAMROOT20_START 20240420 */ paging_init(); + /* IAMROOT20 20240511 + * acpi는 분석하지 않음 + */ acpi_table_upgrade(); /* Parse the ACPI tables for possible boot-time configuration */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 26231030f4561..79f817934b7bb 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -833,12 +833,25 @@ static void __init create_idmap(void) u64 pgd_phys; /* check if we need an additional level of translation */ + /* IAMROOT20 20240511 + * 물리 주소의 idmap이 동일한 주소의 유저 가상 주소 공간에 배치가 불가능한 경우 + * -> 테이블 단계를 증가시켜 유저 가상 주소 공간을 키워 매핑하게 함 + * + * ex) VA_BITS = 42, VA_BITS_MIN = 42 + * idmap_t0sz = 16 + * if( (42 < 48) && (16 < 22) ) -> 페이지 테이블 단계를 증가시킴 + */ if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { pgd_phys = early_pgtable_alloc(PAGE_SHIFT); set_pgd(&idmap_pg_dir[start >> VA_BITS], __pgd(pgd_phys | P4D_TYPE_TABLE)); pgd = __va(pgd_phys); } + /* IAMROOT20 20240511 + * __idmap_text_start ~ __idmap_text_end 까지를 idmap_pg_dir에 매핑한다 + * idmap_pg_dir : 가상 주소와 물리 주소가 1:1로 매핑되어 사용될 때 + * 필요한 테이블로 영구적으로 사용 + */ __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, early_pgtable_alloc, 0); @@ -877,9 +890,17 @@ void __init paging_init(void) cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir); init_mm.pgd = swapper_pg_dir; + /* IAMROOT20 20240511 + * 부팅 초기에 사용했던 init_pg_dir이 swapper_pg_dir로 대체되었고, + * init_pg_dir을 memblock.reserved에서 지운다 + */ memblock_phys_free(__pa_symbol(init_pg_dir), __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); + /* IAMROOT20 20240511 + * 이 함수 호출 이후에 memblock_double_array에서 memblock array가 부족할 때 + * 크기를 2배씩 증가시킬 수 있다 + */ memblock_allow_resize(); create_idmap(); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 5c87ae847cf86..40f51e310e0d0 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -171,6 +171,10 @@ SYM_FUNC_END(cpu_do_resume) .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 adrp \tmp1, reserved_pg_dir + /* IAMROOT20 20240511 + * PA_BITS가 52가 아닌 경우에는 + * -> phys_to_ttbr : mov tmp2, tmp1 + */ phys_to_ttbr \tmp2, \tmp1 offset_ttbr1 \tmp2, \tmp1 msr ttbr1_el1, \tmp2 @@ -187,6 +191,9 @@ SYM_FUNC_END(cpu_do_resume) * called by anything else. It can only be executed from a TTBR0 mapping. */ SYM_TYPED_FUNC_START(idmap_cpu_replace_ttbr1) + /* IAMROOT20 20240511 + * msr ttbr1_el1, reserved_pg_dir + */ __idmap_cpu_set_reserved_ttbr1 x1, x3 offset_ttbr1 x0, x3 diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 79940a684e531..372cf20bbffa2 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -295,9 +295,15 @@ static int unflatten_dt_nodes(const void *blob, #define FDT_MAX_DEPTH 64 struct device_node *nps[FDT_MAX_DEPTH]; void *base = mem; + /* IAMROOT20 20240511 + * first pass - 크기 계산 -> dryrun = true + */ bool dryrun = !base; int ret; + /* IAMROOT20 20240511 + * first pass - nodepp = null; + */ if (nodepp) *nodepp = NULL; @@ -323,6 +329,7 @@ static int unflatten_dt_nodes(const void *blob, if (!IS_ENABLED(CONFIG_OF_KOBJ) && !of_fdt_device_is_available(blob, offset)) continue; + /* IAMROOT20_END 20240511 */ ret = populate_node(blob, offset, &mem, nps[depth], &nps[depth+1], dryrun); diff --git a/scripts/dtc/libfdt/fdt.c b/scripts/dtc/libfdt/fdt.c index 1f2f0671f8a7c..fd4f671390974 100644 --- a/scripts/dtc/libfdt/fdt.c +++ b/scripts/dtc/libfdt/fdt.c @@ -143,6 +143,10 @@ int fdt_check_header(const void *fdt) const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len) { + /* IAMROOT20 20240511 + * uoffset : structure block 내에서의 offset + * absoffset : fdt 시작 주소로부터 offset + */ unsigned int uoffset = offset; unsigned int absoffset = offset + fdt_off_dt_struct(fdt); @@ -160,9 +164,16 @@ const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len) || ((offset + len) > fdt_size_dt_struct(fdt))) return NULL; + /* IAMROOT20 20240511 + * fdt + structure block offset + structure block 내에서의 offset + */ return fdt_offset_ptr_(fdt, offset); } +/* IAMROOT20 20240511 + * startoffset이 가리키는 tag를 반환 + * nextoffset : 다음 tag의 offset을 저장 + */ uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) { const fdt32_t *tagp, *lenp; @@ -171,6 +182,10 @@ uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) const char *p; *nextoffset = -FDT_ERR_TRUNCATED; + /* IAMROOT20 20240511 + * 현재 offset이 가리키는 위치의 주소를 반환 + * - fdt + 'structure block offset' + 'structure block 내에서의 offset' + */ tagp = fdt_offset_ptr(fdt, offset, FDT_TAGSIZE); if (!can_assume(VALID_DTB) && !tagp) return FDT_END; /* premature end */ @@ -227,6 +242,10 @@ uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) int fdt_check_node_offset_(const void *fdt, int offset) { if (!can_assume(VALID_INPUT) + /* IAMROOT20 20240511 + * FDT_TAGSIZE = sizeof(fdt32_t) = 4 + * - offset % FDT_TAGSIZE : 4바이트 단위로 정렬이 되어 있지 않는 경우 + */ && ((offset < 0) || (offset % FDT_TAGSIZE))) return -FDT_ERR_BADOFFSET; @@ -248,6 +267,9 @@ int fdt_check_prop_offset_(const void *fdt, int offset) return offset; } +/* IAMROOT20 20240511 + * offset(현재 node)이 가리키는 위치의 다음 node의 offset를 반환 + */ int fdt_next_node(const void *fdt, int offset, int *depth) { int nextoffset = 0; @@ -272,14 +294,27 @@ int fdt_next_node(const void *fdt, int offset, int *depth) break; case FDT_END_NODE: + /* IAMROOT20 20240511 + * (*depth) < 0 인 경우 + * -> 부모 노드로 올라가는 경우이므로, + * 여기서 nextoffset을 return하여 하위 노드만 돌도록 함 + * (부모 노드로 올라가지 못하도록) + */ if (depth && ((--(*depth)) < 0)) return nextoffset; break; case FDT_END: + /* IAMROOT20 20240511 + * fdt 끝(END) 인 경우 또는 + * nextoffset == -FDT_ERR_TRUNCATED 인 경우 + */ if ((nextoffset >= 0) || ((nextoffset == -FDT_ERR_TRUNCATED) && !depth)) return -FDT_ERR_NOTFOUND; + /* IAMROOT20 20240511 + * nextoffset == -FDT_ERR_BADSTRUCTURE 일 경우 + */ else return nextoffset; } From fd93e0f6e6cd5f40cba8390f98cdce8347e9ed46 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 18 May 2024 22:04:24 +0900 Subject: [PATCH 058/104] IAMROOT20 20240518 - Add comments Signed-off-by: Daero Lee --- drivers/of/fdt.c | 40 +++++++++++++++++++++++++++++++++++++ scripts/dtc/libfdt/fdt.c | 8 ++++++++ scripts/dtc/libfdt/fdt_ro.c | 33 +++++++++++++++++++++++++++++- 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 372cf20bbffa2..550890bd9a875 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -97,11 +97,18 @@ static bool of_fdt_device_is_available(const void *blob, unsigned long node) return false; } +/* IAMROOT20 20240518 + * 현재 *mem을 align에 맞춰 올림한 후 return + * *mem : *mem += size를 저장 + */ static void *unflatten_dt_alloc(void **mem, unsigned long size, unsigned long align) { void *res; + /* IAMROOT20 20240518 + * PTR_ALIGN(*mem, align) : *mem 값을 align 단위로 올림 + */ *mem = PTR_ALIGN(*mem, align); res = *mem; *mem += size; @@ -139,6 +146,10 @@ static void populate_properties(const void *blob, continue; } + /* IAMROOT20 20240518 + * 현재 node의 name property가 있다면, + * 아래 if (!has_name) 조건문에서 만들지 않는다 + */ if (!strcmp(pname, "name")) has_name = true; @@ -176,10 +187,18 @@ static void populate_properties(const void *blob, /* With version 0x10 we may not have the name property, * recreate it here from the unit name if absent */ + /* IAMROOT20 20240518 + * 현재 node의 name property를 만들어 연결한다 + */ if (!has_name) { const char *p = nodename, *ps = p, *pa = NULL; int len; + /* IAMROOT20 20240518 + * ex) p = "/soc/uart@7e201000" + * ^--- pa + * ^--- ps + */ while (*p) { if ((*p) == '@') pa = p; @@ -217,6 +236,10 @@ static int populate_node(const void *blob, const char *pathp; int len; + /* IAMROOT20 20240518 + * len : node name의 길이 저장 + * pathp : node name의 가상 주소(offset 아님) + */ pathp = fdt_get_name(blob, offset, &len); if (!pathp) { *pnp = NULL; @@ -225,11 +248,19 @@ static int populate_node(const void *blob, len++; + /* IAMROOT20 20240518 + * ex) *mem = 7, len = 7, sizeof(struct device_node) = 208 + * -> *mem = (7 + 1(align 올림)) + 7 + 208 = 223 + * np = (7 + 1(align 올림) = 8 + */ np = unflatten_dt_alloc(mem, sizeof(struct device_node) + len, __alignof__(struct device_node)); if (!dryrun) { char *fn; of_node_init(np); + /* IAMROOT20 20240518 + * device_node 끝에 node name을 저장 + */ np->full_name = fn = ((char *)np) + sizeof(*np); memcpy(fn, pathp, len); @@ -241,6 +272,9 @@ static int populate_node(const void *blob, } } + /* IAMROOT20 20240518 + * node의 '모든 property' + 'node name property'를 추가 + */ populate_properties(blob, offset, mem, np, pathp, dryrun); if (!dryrun) { np->name = of_get_property(np, "name", NULL); @@ -297,6 +331,7 @@ static int unflatten_dt_nodes(const void *blob, void *base = mem; /* IAMROOT20 20240511 * first pass - 크기 계산 -> dryrun = true + * second pass -> dryrun = false */ bool dryrun = !base; int ret; @@ -331,6 +366,7 @@ static int unflatten_dt_nodes(const void *blob, continue; /* IAMROOT20_END 20240511 */ + /* IAMROOT20_START 20240518 */ ret = populate_node(blob, offset, &mem, nps[depth], &nps[depth+1], dryrun); if (ret < 0) @@ -354,6 +390,9 @@ static int unflatten_dt_nodes(const void *blob, if (!dryrun) reverse_nodes(root); + /* IAMROOT20 20240518 + * first pass : (mem - base) 로 필요한 크기를 구함 + */ return mem - base; } @@ -1422,6 +1461,7 @@ void __init unflatten_device_tree(void) { __unflatten_device_tree(initial_boot_params, NULL, &of_root, early_init_dt_alloc_memory_arch, false); + /* IAMROOT20_END 20240518 */ /* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */ of_alias_scan(early_init_dt_alloc_memory_arch); diff --git a/scripts/dtc/libfdt/fdt.c b/scripts/dtc/libfdt/fdt.c index fd4f671390974..aebacc3fe1cb2 100644 --- a/scripts/dtc/libfdt/fdt.c +++ b/scripts/dtc/libfdt/fdt.c @@ -239,6 +239,10 @@ uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset) return tag; } +/* IAMROOT20 20240518 + * offset을 그대로 return + * - offset이 유효한지 확인, 현재 offset의 tag가 FDT_BEGIN_NODE인 지 확인 + */ int fdt_check_node_offset_(const void *fdt, int offset) { if (!can_assume(VALID_INPUT) @@ -255,6 +259,10 @@ int fdt_check_node_offset_(const void *fdt, int offset) return offset; } +/* IAMROOT20 20240518 + * offset을 그대로 return + * - offset이 유효한지 확인, 현재 offset의 tag가 FDT_PROP인지 확인 + */ int fdt_check_prop_offset_(const void *fdt, int offset) { if (!can_assume(VALID_INPUT) diff --git a/scripts/dtc/libfdt/fdt_ro.c b/scripts/dtc/libfdt/fdt_ro.c index c1dfaadc94f9a..5f4f1b41b8421 100644 --- a/scripts/dtc/libfdt/fdt_ro.c +++ b/scripts/dtc/libfdt/fdt_ro.c @@ -31,6 +31,10 @@ static int fdt_nodename_eq_(const void *fdt, int offset, return 0; } +/* IAMROOT20 20240518 + * string block 안에서 stroffset이 가리키는 위치의 string(name)을 return + * *lenp : name의 길이를 저장 + */ const char *fdt_get_string(const void *fdt, int stroffset, int *lenp) { int32_t totalsize; @@ -52,6 +56,10 @@ const char *fdt_get_string(const void *fdt, int stroffset, int *lenp) goto fail; err = -FDT_ERR_BADOFFSET; + /* IAMROOT20 20240518 + * absoffset : fdt 안에서 stroffset이 가리키는 위치까지 offset + * - stroffset : string block 안에서의 offset + */ absoffset = stroffset + fdt_off_dt_strings(fdt); if (absoffset >= (unsigned)totalsize) goto fail; @@ -201,6 +209,9 @@ int fdt_num_mem_rsv(const void *fdt) return -FDT_ERR_TRUNCATED; } +/* IAMROOT20 20240518 + * 현재 offset 위치에서 다음 FDT_PROP tag의 offset을 return + */ static int nextprop_(const void *fdt, int offset) { uint32_t tag; @@ -277,7 +288,7 @@ int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen) const char *q; while (*p == '/') { - p++; + :p++; if (p == end) return offset; } @@ -300,6 +311,10 @@ int fdt_path_offset(const void *fdt, const char *path) return fdt_path_offset_namelen(fdt, path, strlen(path)); } +/* IAMROOT20 20240518 + * 현재 nodeoffset 위치의 node name 주소를 return + * *len : node name의 길이를 저장 + */ const char *fdt_get_name(const void *fdt, int nodeoffset, int *len) { const struct fdt_node_header *nh = fdt_offset_ptr_(fdt, nodeoffset); @@ -318,6 +333,10 @@ const char *fdt_get_name(const void *fdt, int nodeoffset, int *len) * give only the leaf name (after all /). The actual tree * contents are loosely checked. */ + /* IAMROOT20 20240518 + * ex) nameptr = "/memory/gpu" + * ^-- leaf + */ const char *leaf; leaf = strrchr(nameptr, '/'); if (leaf == NULL) { @@ -356,6 +375,10 @@ int fdt_next_property_offset(const void *fdt, int offset) return nextprop_(fdt, offset); } +/* IAMROOT20 20240518 + * offset이 가리키는 위치의 fdt_property 포인터 return + * *lenp : property value 길이 저장 + */ static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt, int offset, int *lenp) @@ -370,6 +393,9 @@ static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt, return NULL; } + /* IAMROOT20 20240518 + * prop : offset이 가리키는 위치의 가상 '주소' + */ prop = fdt_offset_ptr_(fdt, offset); if (lenp) @@ -469,6 +495,11 @@ const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, return prop->data; } +/* IAMROOT20 20240518 + * offset이 가리키는 property value(prop->data)를 return + * *namep : property name 저장 + * *lenp : name 길이 저장 + */ const void *fdt_getprop_by_offset(const void *fdt, int offset, const char **namep, int *lenp) { From e358e0e20c0b335ce264f896a7965e8403b92a74 Mon Sep 17 00:00:00 2001 From: fehead Date: Sun, 19 May 2024 21:23:07 +0900 Subject: [PATCH 059/104] =?UTF-8?q?IAMROOT20=20unflatten=5Fdt=5Fnodes=20?= =?UTF-8?q?=EC=A3=BC=EC=84=9D=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- drivers/of/fdt.c | 15 +++++++++++++++ include/linux/align.h | 5 +++++ include/uapi/linux/const.h | 6 ++++++ 3 files changed, 26 insertions(+) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 372cf20bbffa2..e9817c7895b34 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -290,6 +290,14 @@ static int unflatten_dt_nodes(const void *blob, struct device_node *dad, struct device_node **nodepp) { + /* IAMROOT20 20240515 + * First pass + * unflatten_dt_nodes(initial_boot_params, NULL, NULL, NULL); + */ + /* IAMROOT20 20240518 + * Second pass + * unflatten_dt_nodes(initial_boot_params, mem, NULL, &of_root) + */ struct device_node *root; int offset = 0, depth = 0, initial_depth = 0; #define FDT_MAX_DEPTH 64 @@ -379,6 +387,10 @@ void *__unflatten_device_tree(const void *blob, void *(*dt_alloc)(u64 size, u64 align), bool detached) { + /* + *__unflatten_device_tree(initial_boot_params, NULL, &of_root, + * early_init_dt_alloc_memory_arch, false); + */ int size; void *mem; int ret; @@ -422,6 +434,9 @@ void *__unflatten_device_tree(const void *blob, pr_debug(" unflattening %p...\n", mem); + /* IAMROOT20 20240518 + * unflatten_dt_nodes( initial_boot_params, mem, NULL, &of_root) + */ /* Second pass, do actual unflattening */ ret = unflatten_dt_nodes(blob, mem, dad, mynodes); diff --git a/include/linux/align.h b/include/linux/align.h index 2b4acec7b95a2..d3456c152675a 100644 --- a/include/linux/align.h +++ b/include/linux/align.h @@ -5,6 +5,11 @@ #include /* @a is a power of 2 value */ +/* IAMROOT20 20240518 + * ALIGN(14, 8) + * -> (14+7) & ~7 = 21 & 0xffff_..._fff8 + * -> 16 + */ #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) #define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index 0e22fbd3898af..eb677f8106911 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -28,6 +28,12 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +/* IAMROOT20 20240518 + * __ALIGN_KERNEL(14, 8) + * -> __ALIGN_KERNEL_MASK(14, 7) + * -> (14+7) & ~7 = 21 & 0xffff_..._fff8 + * -> 16 + */ #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) From ffe676cb6f9842f5e6964a1e7d33b2d5ed0f75bf Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 25 May 2024 21:59:25 +0900 Subject: [PATCH 060/104] IAMROOT20 - Add comments Signed-off-by: Daero Lee --- drivers/base/arch_numa.c | 10 +++++ drivers/of/base.c | 92 ++++++++++++++++++++++++++++++++++++++++ drivers/of/fdt.c | 1 + drivers/of/of_numa.c | 20 +++++++++ drivers/of/unittest.c | 5 +++ include/linux/bitmap.h | 27 ++++++++++++ include/linux/nodemask.h | 9 ++++ include/linux/numa.h | 4 ++ include/linux/string.h | 6 +++ lib/string.c | 17 ++++++++ 10 files changed, 191 insertions(+) diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index eaa31e567d1ec..13eef9dbb90ac 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -17,6 +17,9 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); +/* IAMROOT20 20240525 + * numa_nodes_parsed = { bits[1] } + */ nodemask_t numa_nodes_parsed __initdata; static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; @@ -275,6 +278,13 @@ static int __init numa_alloc_distance(void) size_t size; int i, j; + /* IAMROOT20 20240525 + * ex) MAX_NUMNODES > 1 이면 + * - nr_node_ids = MAX_NUMNODES = 16 + * -> size = 16 * 16 * 1(u8) = 256 + * + * numa_distance : 16 x 16 배열, 원소 하나는 1byte + */ size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]); numa_distance = memblock_alloc(size, PAGE_SIZE); if (WARN_ON(!numa_distance)) diff --git a/drivers/of/base.c b/drivers/of/base.c index 166fb7d753378..c72457d3254be 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -55,6 +55,12 @@ DEFINE_MUTEX(of_mutex); */ DEFINE_RAW_SPINLOCK(devtree_lock); +/* IAMROOT20 20240525 + * ex) np = 'cpu0: cpu@10000' 노드, name = "cpu" + * + * node_name = "cpu@10000" + * len = 3 + */ bool of_node_name_eq(const struct device_node *np, const char *name) { const char *node_name; @@ -644,6 +650,30 @@ struct device_node *of_get_next_cpu_node(struct device_node *prev) unsigned long flags; struct device_node *node; + /* IAMROOT20 20240525 + * ex) + * cpus { + * #address-cells = <1>; + * #size-cells = <0>; + * + * cpu0: cpu@10000 { + * device_type = "cpu"; + * compatible = "arm,cortex-a72"; + * reg = <0x10000>; + * enable-method = "psci"; + * next-level-cache = <&cluster0_l2>; + * numa-node-id = <0>; + * }; + * + * cpu1: cpu@10001 { + * device_type = "cpu"; + * compatible = "arm,cortex-a72"; + * reg = <0x10001>; + * enable-method = "psci"; + * next-level-cache = <&cluster0_l2>; + * numa-node-id = <0>; + * }; + */ if (!prev) node = of_find_node_by_path("/cpus"); @@ -657,6 +687,10 @@ struct device_node *of_get_next_cpu_node(struct device_node *prev) for (; next; next = next->sibling) { if (__of_device_is_fail(next)) continue; + /* IAMROOT20 20240525 + * node name이 "cpu"로 시작하는 지 확인 - ex) "cpu@10000" + * device_type = "cpu" 인지 확인 + */ if (!(of_node_name_eq(next, "cpu") || __of_node_is_type(next, "cpu"))) continue; @@ -717,6 +751,12 @@ struct device_node *of_get_child_by_name(const struct device_node *node, } EXPORT_SYMBOL(of_get_child_by_name); +/* IAMROOT20 20240525 + * parent 노드의 모든 child의 마지막 path가 path와 일치하는 child를 return + * ex) parent : of_root("/"), path : "aliases" + * child : "/soc", "/chosen", "/aliases" + * -> "/aliases" 노드를 return + */ struct device_node *__of_find_node_by_path(struct device_node *parent, const char *path) { @@ -747,6 +787,14 @@ struct device_node *__of_find_node_by_full_path(struct device_node *node, node = __of_find_node_by_path(node, path); of_node_put(tmp); path = strchrnul(path, '/'); + /* IAMROOT20 20240525 + * separator가 NULL이 아닌 경우 while문 탈출 조건 + * ex) path = "/foo/bar:bao" + * ^---- separator + * 1-round) ^--- path + * 2-round) ^--- path + * : separator < path -> break + */ if (separator && separator < path) break; } @@ -771,6 +819,10 @@ struct device_node *__of_find_node_by_full_path(struct device_node *node, * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ +/* IAMROOT20 20240525 + * ex) path = "i2c2_pins_a: i2c2" + * - opts가 null이 아니면 *opts에 ':' 다음 위치를 저장 + */ struct device_node *of_find_node_opts_by_path(const char *path, const char **opts) { struct device_node *np = NULL; @@ -781,9 +833,23 @@ struct device_node *of_find_node_opts_by_path(const char *path, const char **opt if (opts) *opts = separator ? separator + 1 : NULL; + /* IAMROOT20 20240525 + * path = "/" : root path인 경우 of_root를 return + */ if (strcmp(path, "/") == 0) return of_node_get(of_root); + /* IAMROOT20 20240525 + * aliases를 사용하는 path의 경우 + * ex1) path = "foo" + * ^----p + * ex2) path = "foo/bar" + * ^----p + * len : aliases의 길이 + * + * np : aliases가 가리키는 노드 ex) foo aliases의 노드 + * path : p + */ /* The path could begin with an alias */ if (*path != '/') { int len; @@ -1729,11 +1795,22 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) /* linux,stdout-path and /aliases/stdout are for legacy compatibility */ const char *name = NULL; + /* IAMROOT20 20240525 + * ex) chosen { + * stdout-path = "/pl011@9000000"; + * }; + * + * -> name = "/pl011@9000000" + */ if (of_property_read_string(of_chosen, "stdout-path", &name)) of_property_read_string(of_chosen, "linux,stdout-path", &name); if (IS_ENABLED(CONFIG_PPC) && !name) of_property_read_string(of_aliases, "stdout", &name); + /* IAMROOT20 20240525 + * name으로 of_stdout node를 찾는다 + * - ':'이 name에 있으면(옵션 문자열), of_stdout_options에 저장 + */ if (name) of_stdout = of_find_node_opts_by_path(name, &of_stdout_options); if (of_stdout) @@ -1743,6 +1820,11 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) if (!of_aliases) return; + /* IAMROOT20 20240525 + * of_aliases 노드의 property를 순회하면서 alias_prop 구조체를 만들어 + * aliases_lookup 리스트에 추가한다 + * - 나중에 aliases가 나오면 노드, name 등을 빠르게 찾기 위해 + */ for_each_property_of_node(of_aliases, pp) { const char *start = pp->name; const char *end = start + strlen(start); @@ -1760,6 +1842,16 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) if (!np) continue; + /* IAMROOT20 20240525 + * aliases { + * serial0 = &uart0; + * ^----------- start = alias = "serial0" + * ^---- end 1 + * ^----- end 2 + * <> id = 0 + * <-----> stem = "serial" + * }; + */ /* walk the alias backwards to extract the id and work out * the 'stem' string */ while (isdigit(*(end-1)) && end > start) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 550890bd9a875..7b655fd275c73 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1463,6 +1463,7 @@ void __init unflatten_device_tree(void) early_init_dt_alloc_memory_arch, false); /* IAMROOT20_END 20240518 */ + /* IAMROOT20_START 20240525 */ /* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */ of_alias_scan(early_init_dt_alloc_memory_arch); diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 5949829a1b001..ee894ee64e287 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -27,6 +27,16 @@ static void __init of_numa_parse_cpu_nodes(void) struct device_node *np; for_each_of_cpu_node(np) { + /* IAMROOT20 20240525 + * ex) cpu1: cpu@10001 { + * device_type = "cpu"; + * compatible = "arm,cortex-a72"; + * reg = <0x10001>; + * enable-method = "psci"; + * next-level-cache = <&cluster0_l2>; + * ------> numa-node-id = <0>; + * }; + */ r = of_property_read_u32(np, "numa-node-id", &nid); if (r) continue; @@ -46,6 +56,15 @@ static int __init of_numa_parse_memory_nodes(void) u32 nid; int i, r; + /* IAMROOT20 20240525 + * ex) memory@0 { + * device_type = "memory"; + * reg = <0x0 0x00000000 0x0 0x40000000>; + * numa-node-id = <0>; + * }; + * device_type = "memory"인 모든 노드를 순회 + * - 'numa-node-id' property 값을 nid에 저장 + */ for_each_node_by_type(np, "memory") { r = of_property_read_u32(np, "numa-node-id", &nid); if (r == -EINVAL) @@ -61,6 +80,7 @@ static int __init of_numa_parse_memory_nodes(void) r = -EINVAL; } + /* IAMROOT20_END 20240525 */ for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 2191c01365317..1c003d0400ab9 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -3405,6 +3405,11 @@ static void * __init dt_alloc_memory(u64 size, u64 align) * * Have to stop before resolving phandles, because that uses kmalloc. */ +/* IAMROOT20 20240525 + * dtb overlay unittest를 위한 dt tree를 생성하여 &overlay_base_root에 저장 + * - overlay에 대한 dtbo는 scripts/Makefile.lib에서 생성 + * -> 정확하게 어떤 부분에서 생성하는 지는 모름... + */ void __init unittest_unflatten_overlay_base(void) { struct overlay_info *info; diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 7d6d73b781472..c14b282ca2fbf 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -234,7 +234,34 @@ extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, int nmaskbits, loff_t off, size_t count); +/* IAMROOT20 20240525 + * exam) start = 1 --> 0xffff_ffff_ffff_fffe + * exam) start = 2 --> 0xffff_ffff_ffff_fffc + * exam) start = 3 --> 0xffff_ffff_ffff_fff8 + * exam) start = 4 --> 0xffff_ffff_ffff_fff0 + * exam) start = 16 --> 0xffff_ffff_ffff_0000 + */ #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) +/* IAMROOT20 20240525 + * BITMAP_LAST_WORD_MASK(nbits) : 0번째 bit 부터 (nbits-1) 번째 bit 까지 1로 set + * 0b 0000...000111...111 + * ^--- 0 번째 + * ^--- (nbits - 1) 번째 + * + * exam) nbits = 16 + * ~0UL >> (-16 & 63) + * = ~0UL >> (0xffff_ffff_ffff_fff0 & 0x3f) + * = ~0UL >> 0x30 + * = ~0UL >> 48 + * = 0x0000_0000_0000_ffff + * + * exam) nbits = 1 --> 0x0000_0000_0000_0001 + * exam) nbits = 2 --> 0x0000_0000_0000_0003 + * exam) nbits = 3 --> 0x0000_0000_0000_0007 + * exam) nbits = 17 --> 0x0000_0000_0001_ffff + * exam) nbits = 30 --> 0x0000_0000_3fff_ffff + * exam) nbits = 32 --> 0x0000_0000_ffff_ffff + */ #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index bb0ee80526b2d..5e5e8c89cac79 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -96,6 +96,11 @@ #include #include +/* IAMROOT20 20240525 + * MAX_NUMNODES = 1<<4 = 16 + * + * bits[BITS_TO_LONGS(MAX_NUMNODES)] = bits[1] + */ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; extern nodemask_t _unused_nodemask_arg_; @@ -314,6 +319,10 @@ static inline unsigned int __first_unset_node(const nodemask_t *maskp) #if MAX_NUMNODES <= BITS_PER_LONG +/* IAMROOT20 20240525 + * ex) MAX_NUMNODES = 16 인 경우 + * -> NODE_MASK_ALL = {{ [0] = 0xffff }} + */ #define NODE_MASK_ALL \ ((nodemask_t) { { \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ diff --git a/include/linux/numa.h b/include/linux/numa.h index 59df211d051fa..83ecaa3f77ba5 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -9,6 +9,10 @@ #define NODES_SHIFT 0 #endif +/* IAMROOT20 20240525 + * NODES_SHIFT = CONFIG_NODES_SHIFT = 4 + * MAX_NUMNODES = 1<<4 = 16 + */ #define MAX_NUMNODES (1 << NODES_SHIFT) #define NUMA_NO_NODE (-1) diff --git a/include/linux/string.h b/include/linux/string.h index c062c581a98b9..5a1d28ddc2c99 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -244,6 +244,12 @@ static inline void memzero_explicit(void *s, size_t count) * * @path: path to extract the filename from. */ +/* IAMROOT20 20240525 + * ex1) path = "foo/bar" + * ^---- tail, return : tail + 1 + * ex2) path = "foo" + * ^---- return + */ static inline const char *kbasename(const char *path) { const char *tail = strrchr(path, '/'); diff --git a/lib/string.c b/lib/string.c index 3d55ef8901068..277d07a0e214a 100644 --- a/lib/string.c +++ b/lib/string.c @@ -324,6 +324,12 @@ EXPORT_SYMBOL(strncmp); * Note that the %NUL-terminator is considered part of the string, and can * be searched for. */ +/* IAMROOT20 20240525 + * ex1) s = "foo/bar", c = '/' + * ^---- s : return + * ex2) s = "foo/bar", c = ':' + * - return NULL + */ char *strchr(const char *s, int c) { for (; *s != (char)c; ++s) @@ -343,6 +349,12 @@ EXPORT_SYMBOL(strchr); * Returns pointer to first occurrence of 'c' in s. If c is not found, then * return a pointer to the null byte at the end of s. */ +/* IAMROOT20 20240525 + * ex) s = "foo/bar", c = '/' + * ^----s : return + * ex) s = "foo/bar", c = ':' + * ^----s : return + */ char *strchrnul(const char *s, int c) { while (*s && *s != (char)c) @@ -459,6 +471,11 @@ EXPORT_SYMBOL(strspn); * @s: The string to be searched * @reject: The string to avoid */ +/* IAMROOT20 20240525 + * ex) s = "foo/bar", reject = "/:" + * ^----p + * return : p-s = 3 + */ size_t strcspn(const char *s, const char *reject) { const char *p; From cd55e2852dada52d4486665ecf35bcd22028a6b7 Mon Sep 17 00:00:00 2001 From: fehead Date: Sun, 26 May 2024 20:37:36 +0900 Subject: [PATCH 061/104] =?UTF-8?q?IAMROOT20=20numa=5Finit=20=EC=A3=BC?= =?UTF-8?q?=EC=84=9D=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- drivers/base/arch_numa.c | 27 ++++++++++++++++++++++++++ drivers/of/base.c | 42 +++++++++++++++++++++++++++++++++++++++- drivers/of/fdt.c | 2 +- drivers/of/of_numa.c | 3 +++ include/linux/bitmap.h | 19 ++++++++++++++++++ include/linux/nodemask.h | 3 +++ include/linux/numa.h | 3 +++ mm/page_alloc.c | 3 +++ 8 files changed, 100 insertions(+), 2 deletions(-) diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index eaa31e567d1ec..0a27567560eaa 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -17,10 +17,25 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); +/* IAMROOT20 20240525 + * numa_init + * nodes_clear(numa_nodes_parsed) + */ nodemask_t numa_nodes_parsed __initdata; static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; +/* IAMROOT20 20240525 + * numa_distance_cnt = 16 + */ static int numa_distance_cnt; +/* IAMROOT20 20240525 + * 16x16 2차원 행열을 만든다. numa_alloc_distance + * { { 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}, + * { 20, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}, + * { 20, 20, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}, + * ... + * { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 10} } + */ static u8 *numa_distance; bool numa_off; @@ -275,7 +290,13 @@ static int __init numa_alloc_distance(void) size_t size; int i, j; + /* IAMROOT20 20240525 + * size = 16 * 16 * 1 = 256 + */ size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]); + /* IAMROOT20 20240525 + * 16x16 2차원 행열을 만든다. + */ numa_distance = memblock_alloc(size, PAGE_SIZE); if (WARN_ON(!numa_distance)) return -ENOMEM; @@ -373,6 +394,12 @@ static int __init numa_register_nodes(void) return 0; } +/* IAMROOT20 20240525 + * __init arch_numa_init + * acpi_enable numa_init(arch_acpi_numa_init) + * acpi_disabled numa_init(of_numa_init) + * etc numa_init(dummy_numa_init) + */ static int __init numa_init(int (*init_func)(void)) { int ret; diff --git a/drivers/of/base.c b/drivers/of/base.c index 166fb7d753378..42cb67404332b 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -717,6 +717,9 @@ struct device_node *of_get_child_by_name(const struct device_node *node, } EXPORT_SYMBOL(of_get_child_by_name); +/* IAMROOT20 20240525 + * __of_find_node_by_path(of_root, "aliases"); + */ struct device_node *__of_find_node_by_path(struct device_node *parent, const char *path) { @@ -735,6 +738,9 @@ struct device_node *__of_find_node_by_path(struct device_node *parent, return NULL; } +/* IAMROOT20 20240525 + * __of_find_node_by_full_path(of_root, "/aliases"); + */ struct device_node *__of_find_node_by_full_path(struct device_node *node, const char *path) { @@ -771,13 +777,20 @@ struct device_node *__of_find_node_by_full_path(struct device_node *node, * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ +/* IAMROOT20 20240525 + * of_find_node_by_path + * of_aliases = of_find_node_by_path("/aliases"); + * of_find_node_opts_by_path("/aliases", NULL); + * + * of_alias_scan + * of_find_node_opts_by_path("/pl011@9000000", &of_stdout_options); + */ struct device_node *of_find_node_opts_by_path(const char *path, const char **opts) { struct device_node *np = NULL; struct property *pp; unsigned long flags; const char *separator = strchr(path, ':'); - if (opts) *opts = separator ? separator + 1 : NULL; @@ -1729,11 +1742,30 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) /* linux,stdout-path and /aliases/stdout are for legacy compatibility */ const char *name = NULL; + /* IAMROOT20 20240525 + * exam) + * chosen { + * stdout-path = "/pl011@9000000"; + * }; + * name = "/pl011@9000000" + */ if (of_property_read_string(of_chosen, "stdout-path", &name)) of_property_read_string(of_chosen, "linux,stdout-path", &name); if (IS_ENABLED(CONFIG_PPC) && !name) of_property_read_string(of_aliases, "stdout", &name); + /* IAMROOT20 20240525 + * exam) name="/pl011@9000000" + * of_stdout = + * pl011@9000000 { + * clock-names = "uartclk\0apb_pclk"; + * clocks = <0x8000 0x8000>; + * interrupts = <0x00 0x01 0x04>; + * reg = <0x00 0x9000000 0x00 0x1000>; + * compatible = "arm,pl011\0arm,primecell"; + * }; + * of_stdout_options = NULL + */ if (name) of_stdout = of_find_node_opts_by_path(name, &of_stdout_options); if (of_stdout) @@ -1756,6 +1788,14 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) !strcmp(pp->name, "linux,phandle")) continue; + /* IAMROOT20 20240525 + * eaxm) + * aliases { + * serial0 = &uart0; + * }; + * name = "serial0" + * value=&uart0 + */ np = of_find_node_by_path(pp->value); if (!np) continue; diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index c8de33209182b..fbbdad62f6f8a 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -427,7 +427,7 @@ void *__unflatten_device_tree(const void *blob, bool detached) { /* - *__unflatten_device_tree(initial_boot_params, NULL, &of_root, + * __unflatten_device_tree(initial_boot_params, NULL, &of_root, * early_init_dt_alloc_memory_arch, false); */ int size; diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 5949829a1b001..2fa902acb1754 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -26,6 +26,9 @@ static void __init of_numa_parse_cpu_nodes(void) int r; struct device_node *np; + /* IAMROOT20 20240525 + * hisilicon/hip07.dtsi 참고 + */ for_each_of_cpu_node(np) { r = of_property_read_u32(np, "numa-node-id", &nid); if (r) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 7d6d73b781472..acc0f986cbdb9 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -234,7 +234,26 @@ extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, int nmaskbits, loff_t off, size_t count); +/* IAMROOT20 20240525 + * exam) start = 1 --> 0xffff_ffff_ffff_fffe + * exam) start = 2 --> 0xffff_ffff_ffff_fffc + * exam) start = 3 --> 0xffff_ffff_ffff_fff8 + * exam) start = 4 --> 0xffff_ffff_ffff_fff0 + * exam) start = 16 --> 0xffff_ffff_ffff_0000 + */ #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) +/* IAMROOT20 20240525 + * exam) nbits = 16 + * ~0UL >> (-16 & 63) + * = ~0UL >> (0xffff_ffff_ffff_fff0 & 0x3f) + * = ~0UL >> 0x30 + * = ~0UL >> 48 + * = 0x0000_0000_0000_ffff + * + * exam) nbits = 17 --> 0x0000_0000_0001_ffff + * exam) nbits = 30 --> 0x0000_0000_3fff_ffff + * exam) nbits = 32 --> 0x0000_0000_ffff_ffff + */ #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index bb0ee80526b2d..94a4f26cfbd46 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -96,6 +96,9 @@ #include #include +/* IAMROOT20 20240525 + * MAX_NUMNODES 1 << 4 16 + */ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; extern nodemask_t _unused_nodemask_arg_; diff --git a/include/linux/numa.h b/include/linux/numa.h index 59df211d051fa..0af70c40b7100 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -9,6 +9,9 @@ #define NODES_SHIFT 0 #endif +/* IAMROOT20 20240525 + * MAX_NUMNODES = 16 + */ #define MAX_NUMNODES (1 << NODES_SHIFT) #define NUMA_NO_NODE (-1) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 47421bedc12b7..9a8045040a562 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -381,6 +381,9 @@ int movable_zone; EXPORT_SYMBOL(movable_zone); #if MAX_NUMNODES > 1 +/* IAMROOT20 20240525 + * nr_node_ids = 16 + */ unsigned int nr_node_ids __read_mostly = MAX_NUMNODES; unsigned int nr_online_nodes __read_mostly = 1; EXPORT_SYMBOL(nr_node_ids); From 0c27279a8d5045916ea8a87b730124e0452ded77 Mon Sep 17 00:00:00 2001 From: fehead Date: Sun, 2 Jun 2024 10:16:19 +0900 Subject: [PATCH 062/104] =?UTF-8?q?IAMROOT20=20numa=5Finit=20=EC=A3=BC?= =?UTF-8?q?=EC=84=9D=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arch/arm64/mm/hugetlbpage.c | 3 +++ drivers/base/arch_numa.c | 17 +++++++++++++++++ drivers/of/of_numa.c | 23 +++++++++++++++++++++++ include/linux/mmzone.h | 3 +++ include/linux/nodemask.h | 8 ++++++++ 5 files changed, 54 insertions(+) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 95364e8bdc194..cef435fdd090e 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -41,6 +41,9 @@ void __init arm64_hugetlb_cma_reserve(void) int order; if (pud_sect_supported()) + /* IAMROOT20 20240601 + * 30 - 12 = 18 + */ order = PUD_SHIFT - PAGE_SHIFT; else order = CONT_PMD_SHIFT - PAGE_SHIFT; diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index c73c4cad9351d..8399d25276176 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -38,6 +38,23 @@ static int numa_distance_cnt; * ... * { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 10} } */ +/* IAMROOT20 20240601 + * hisilicon/hip07-d05.dts + * distance-map { + * compatible = "numa-distance-map-v1"; + * distance-matrix = + * <0 0 10>, <0 1 15>, <0 2 20>, <0 3 25>, + * <1 0 15>, <1 1 10>, <1 2 25>, <1 3 30>, + * <2 0 20>, <2 1 25>, <2 2 10>, <2 3 15>, + * <3 0 25>, <3 1 30>, <3 2 15>, <3 3 10>; + * }; + * { { 10, 15, 20, 25, 20, ..., 20}, + * { 15, 10, 25, 30, 20, ..., 20}, + * { 20, 25, 10, 15, 20, ..., 20}, + * { 25, 30, 15, 10, 20, ..., 20}, + * ... + * { 20, 20, 20, 20, 20, ..., 10} } + */ static u8 *numa_distance; bool numa_off; diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 4e39150e257d8..359333571c9a6 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -84,6 +84,7 @@ static int __init of_numa_parse_memory_nodes(void) } /* IAMROOT20_END 20240525 */ + // TODO: for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); @@ -99,6 +100,17 @@ static int __init of_numa_parse_memory_nodes(void) static int __init of_numa_parse_distance_map_v1(struct device_node *map) { + /* IAMROOT20 20240601 + * hisilicon/hip07-d05.dts + * distance-map { + * compatible = "numa-distance-map-v1"; + * distance-matrix = + * <0 0 10>, <0 1 15>, <0 2 20>, <0 3 25>, + * <1 0 15>, <1 1 10>, <1 2 25>, <1 3 30>, + * <2 0 20>, <2 1 25>, <2 2 10>, <2 3 15>, + * <3 0 25>, <3 1 30>, <3 2 15>, <3 3 10>; + * }; + */ const __be32 *matrix; int entry_count; int i; @@ -151,6 +163,17 @@ static int __init of_numa_parse_distance_map(void) int ret = 0; struct device_node *np; + /* IAMROOT20 20240601 + * hisilicon/hip07-d05.dts + * distance-map { + * compatible = "numa-distance-map-v1"; + * distance-matrix = + * <0 0 10>, <0 1 15>, <0 2 20>, <0 3 25>, + * <1 0 15>, <1 1 10>, <1 2 25>, <1 3 30>, + * <2 0 20>, <2 1 25>, <2 2 10>, <2 3 15>, + * <3 0 25>, <3 1 30>, <3 2 15>, <3 3 10>; + * }; + */ np = of_find_compatible_node(NULL, NULL, "numa-distance-map-v1"); if (np) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a4889c9d4055b..a4bf4d56b5f61 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -28,6 +28,9 @@ #ifndef CONFIG_ARCH_FORCE_MAX_ORDER #define MAX_ORDER 10 #else +/* IAMROOT20 20240601 + * MAX_ORDER = 10 + */ #define MAX_ORDER CONFIG_ARCH_FORCE_MAX_ORDER #endif #define MAX_ORDER_NR_PAGES (1 << MAX_ORDER) diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 5e5e8c89cac79..f35150cf0565e 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -534,7 +534,15 @@ static inline int node_random(const nodemask_t *maskp) #endif } +/* IAMROOT20 20240525 + * numa_init + * nodes_clear(node_online_map); + */ #define node_online_map node_states[N_ONLINE] +/* IAMROOT20 20240525 + * numa_init + * nodes_clear(node_possible_map); + */ #define node_possible_map node_states[N_POSSIBLE] #define num_online_nodes() num_node_state(N_ONLINE) From 712ce7376fa4e7575c46975707644ed7bb7bc4df Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 8 Jun 2024 13:14:44 +0000 Subject: [PATCH 063/104] IAMROOT20 add comments to address.c, of_numa.c, property.c, atomic.h --- drivers/of/address.c | 47 +++++++++++++++++++++++++++++ drivers/of/of_numa.c | 4 +++ drivers/of/property.c | 4 +++ include/asm-generic/bitops/atomic.h | 4 +++ 4 files changed, 59 insertions(+) diff --git a/drivers/of/address.c b/drivers/of/address.c index e692809ff8227..12dc2f49194e0 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -342,6 +342,38 @@ static int of_bus_default_flags_match(struct device_node *np) /* * Array of bus specific translators */ +/* IAMROOT20 20240608 + * https://github.com/rockchip-toybrick/u-boot/blob/master/common/fdt_support.c + * + * struct of_bus - Callbacks for bus specific translators + * @name: A string used to identify this bus in debug output. + * @addresses: The name of the DT property from which addresses are + * to be read, typically "reg". + * @match: Return non-zero if the node whose parent is at + * parentoffset in the FDT blob corresponds to a bus + * of this type, otherwise return zero. If NULL a match + * is assumed. + * @count_cells:Count how many cells (be32 values) a node whose parent + * is at parentoffset in the FDT blob will require to + * represent its address (written to *addrc) & size + * (written to *sizec). + * @map: Map the address addr from the address space of this + * bus to that of its parent, making use of the ranges + * read from DT to an array at range. na and ns are the + * number of cells (be32 values) used to hold and address + * or size, respectively, for this bus. pna is the number + * of cells used to hold an address for the parent bus. + * Returns the address in the address space of the parent + * bus. + * @translate: Update the value of the address cells at addr within an + * FDT by adding offset to it. na specifies the number of + * cells used to hold the address being translated. Returns + * zero on success, non-zero on error. + * + * Each bus type will include a struct of_bus in the of_busses array, + * providing implementations of some or all of the functions used to + * match the bus & handle address translation for its children. + */ static struct of_bus of_busses[] = { #ifdef CONFIG_PCI @@ -522,6 +554,9 @@ static u64 __of_translate_address(struct device_node *dev, parent = get_parent(dev); if (parent == NULL) goto bail; + /* IAMROOT20 20240608 + * bus에 NULL check 필요? + */ bus = of_match_bus(parent); /* Count address cells & copy address locally */ @@ -530,6 +565,9 @@ static u64 __of_translate_address(struct device_node *dev, pr_debug("Bad cell count for %pOF\n", dev); goto bail; } + /* IAMROOT20 20240608 + * addr에 in_addr(reg 노드의 property)의 4바이트 단위로 복사 + */ memcpy(addr, in_addr, na * 4); pr_debug("bus is %s (na=%d, ns=%d) on %pOF\n", @@ -599,6 +637,7 @@ u64 of_translate_address(struct device_node *dev, const __be32 *in_addr) struct device_node *host; u64 ret; + /* IAMROOT20_END 20240608 */ ret = __of_translate_address(dev, of_get_parent, in_addr, "ranges", &host); if (host) { @@ -727,12 +766,20 @@ const __be32 *__of_get_address(struct device_node *dev, int index, int bar_no, return NULL; psize /= 4; + /* IAMROOT20 20240608 + * reg = <0x0 0x00000000 0x0 0x40000000>; + * na = 2, ns = 2 + */ onesize = na + ns; for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) { u32 val = be32_to_cpu(prop[0]); /* PCI bus matches on BAR number instead of index */ if (((bar_no >= 0) && ((val & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0))) || ((index >= 0) && (i == index))) { + /* IAMROOT20 20240608 + * *size = 0x0000_0000_4000_0000; + * *flags = 0x0000_0200 = IORESOURCE_MEM; + */ if (size) *size = of_read_number(prop + na, ns); if (flags) diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 359333571c9a6..ad512bde6e6db 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -48,6 +48,9 @@ static void __init of_numa_parse_cpu_nodes(void) if (nid >= MAX_NUMNODES) pr_warn("Node id %u exceeds maximum value\n", nid); else + /* IAMROOT20 20240608 + * numa_nodes_parsed의 bits의 nid번째 비트 필드를 1로 설정 + */ node_set(nid, numa_nodes_parsed); } } @@ -225,6 +228,7 @@ int __init of_numa_init(void) int r; of_numa_parse_cpu_nodes(); + /* IAMROOT20_START 20240608 */ r = of_numa_parse_memory_nodes(); if (r) return r; diff --git a/drivers/of/property.c b/drivers/of/property.c index ddc75cd50825e..6e7b3ae702ecf 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -519,6 +519,10 @@ int of_property_read_string_helper(const struct device_node *np, l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; + /* IAMROOT20 20240608 + * out_strs에 문자열 p의 주소를 복사 + * - 문자열을 복사하는게 아님 + */ if (out_strs && i >= skip) *out_strs++ = p; } diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index 90a7fe636785f..fbbadb264495d 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -15,6 +15,10 @@ static __always_inline void arch_set_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); + /* IAMROOT20 20240608 + * nr번째 비트 필드의 값을 1로 세팅한다 + * *p |= BIT_MASK(nr) + */ arch_atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p); } From a67ce29b2d23eb076f33cee94f40610c3b907d2c Mon Sep 17 00:00:00 2001 From: fehead Date: Mon, 10 Jun 2024 21:17:02 +0900 Subject: [PATCH 064/104] =?UTF-8?q?IAMROOT20=20hugetlb=5Fcma=5Freserve=20?= =?UTF-8?q?=EC=A3=BC=EC=84=9D=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- arch/arm64/include/asm/pgtable.h | 10 +++++----- arch/arm64/kvm/pkvm.c | 3 +++ drivers/of/of_numa.c | 9 ++++++++- mm/cma.c | 13 +++++++++++++ mm/hugetlb.c | 19 +++++++++++++++++++ mm/memory.c | 4 ++++ 6 files changed, 52 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c3a75ecce0aa0..b41b92226f021 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -389,11 +389,11 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * Hugetlb definitions. */ /* IAMROOT20 20240406 - * ex) VA_BITS : 39, page size : 4KB - * - HPAGE_SHIFT PMD_SHIFT(21) - * - HPAGE_SIZE (1 << 21) - * - HPAGE_MASK ~((1 << 21) - 1) - * - HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) = 21 - 12 = 9 + * ex) VA_BITS : 48, page size : 4KB + * - HPAGE_SHIFT 21 PMD_SHIFT + * - HPAGE_SIZE SIZE_2M (1 << 21) + * - HPAGE_MASK 0xffff~ffe0_0000 ~((1 << 21) - 1) + * - HUGETLB_PAGE_ORDER 9 (21-14) */ #define HUGE_MAX_HSTATE 4 #define HPAGE_SHIFT PMD_SHIFT diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 6e9ece1ebbe72..0c356e7e83842 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -56,6 +56,9 @@ static int __init register_memblock_regions(void) return 0; } +/* IAMROOT20 20240607 + * TODO: + */ void __init kvm_hyp_reserve(void) { u64 hyp_mem_pages = 0; diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index 359333571c9a6..d3b6c5d231396 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -20,6 +20,10 @@ * Even though we connect cpus to numa domains later in SMP * init, we need to know the node ids now for all cpus. */ +/* IAMROOT20 20240608 + * in : device tree cpu node + * out : 분석된 numa-id를 numa_nodes_parsed 비트맵에 셋팅. + */ static void __init of_numa_parse_cpu_nodes(void) { u32 nid; @@ -52,6 +56,10 @@ static void __init of_numa_parse_cpu_nodes(void) } } +/* IAMROOT20 20240608 + * in : device tree memory node + * out : memblock meory영역 별로 numa-id 설정 + */ static int __init of_numa_parse_memory_nodes(void) { struct device_node *np = NULL; @@ -84,7 +92,6 @@ static int __init of_numa_parse_memory_nodes(void) } /* IAMROOT20_END 20240525 */ - // TODO: for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); diff --git a/mm/cma.c b/mm/cma.c index 6268d6620254f..ab711fbe4b4f4 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -171,6 +171,10 @@ void __init cma_reserve_pages_on_error(struct cma *cma) * * This function creates custom contiguous area from already reserved memory. */ +/* IAMROOT20 20240607 + * - size, align, reserved 영역 범위 검사등을 수행하고 cma 정보를 하나 얻고 + * 초기화한다. + */ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, @@ -235,6 +239,15 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * * If @fixed is true, reserve contiguous area at exactly @base. If false, * reserve in range from @base to @limit. + */ +/* IAMROOT20 20240607 + * - cmdline or kernel config에 의한 cma 정보를 memblock reserved 영역을 할당하고 + * @res_cma(struct cma)에 등록한다. + * + * res = cma_declare_contiguous_nid(0, size, 0, + * PAGE_SIZE << HUGETLB_PAGE_ORDER, // SIZE_2M + * 0, false, name, // name="hugetlb[0-16]" + * &hugetlb_cma[nid], nid); */ int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f791076da157c..2fd65ff8c12e3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7489,6 +7489,15 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) #ifdef CONFIG_CMA static bool cma_reserve_called __initdata; +/* IAMROOT20 20240607 + * cmdline에 hugetlb_cma=7G,0:1G,1:2G,2:1G,3:1G 일때 + * + * hugetlb_cma_size = 7*SIZE_1G + * hugetlb_cma_size_in_node[0] = 1*SIZE_1G + * hugetlb_cma_size_in_node[1] = 2*SIZE_1G + * hugetlb_cma_size_in_node[2] = 3*SIZE_1G + * hugetlb_cma_size_in_node[3] = 1*SIZE_1G + */ static int __init cmdline_parse_hugetlb_cma(char *p) { int nid, count = 0; @@ -7496,6 +7505,10 @@ static int __init cmdline_parse_hugetlb_cma(char *p) char *s = p; while (*s) { + /* IAMROOT20 20240607 + * %n은 읽은 문자 개수를 리턴하는 기능이다. + * https://woogyun.tistory.com/301 + */ if (sscanf(s, "%lu%n", &tmp, &count) != 1) break; @@ -7550,6 +7563,9 @@ void __init hugetlb_cma_reserve(int order) continue; } + /* IAMROOT20 20240607 + * order가 18이면 CMA 최소사이즈는 SIZE_1G + */ if (hugetlb_cma_size_in_node[nid] < (PAGE_SIZE << order)) { pr_warn("hugetlb_cma: cma area of node %d should be at least %lu MiB\n", nid, (PAGE_SIZE << order) / SZ_1M); @@ -7571,6 +7587,9 @@ void __init hugetlb_cma_reserve(int order) return; } + /* IAMROOT20 20240607 + * node별 CMA 할당을 할 필요가 없을때. + */ if (!node_specific_cma_alloc) { /* * If 3 GB area is requested on a machine with 4 numa nodes, diff --git a/mm/memory.c b/mm/memory.c index 5ce82a76201d5..6143da0ec8934 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -126,6 +126,10 @@ static bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf) * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL * and ZONE_HIGHMEM. */ +/* IAMROOT20 20240607 + * high_memory = __va(memblock_end_of_DRAM() - 1) + 1; + * arm64_memblock_init에서 설정 + */ void *high_memory; EXPORT_SYMBOL(high_memory); From 613a50d7afc58cd6a024e41334d3c36f301da83b Mon Sep 17 00:00:00 2001 From: gychoi Date: Sat, 15 Jun 2024 13:18:59 +0000 Subject: [PATCH 065/104] IAMROOT20 20240615 - Add comments to arch_numa.c, address.c, of_numa.c --- drivers/base/arch_numa.c | 1 + drivers/of/address.c | 58 ++++++++++++++++++++++++++++++++++++++-- drivers/of/of_numa.c | 11 +++++++- 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 8399d25276176..f482e770e95e7 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -439,6 +439,7 @@ static int __init numa_init(int (*init_func)(void)) if (ret < 0) goto out_free_distance; + /* IAMROOT20_END 20240615 */ if (nodes_empty(numa_nodes_parsed)) { pr_info("No NUMA configuration found\n"); ret = -EINVAL; diff --git a/drivers/of/address.c b/drivers/of/address.c index 12dc2f49194e0..7ca84f0105948 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -66,7 +66,25 @@ static u64 of_bus_default_map(__be32 *addr, const __be32 *range, int na, int ns, int pna) { u64 cp, s, da; - + /* IAMROOT20 20240615 + * ex) ethernet@0,0 { + * compatible = "smc,smc91c111" + * reg = <0 0 0x1000>; + * }; + * + * ex) range = <0 0 0x10100000 0x10000>; + * + * range : 자식 디바이스가 가질 수 있는 버스 주소의 범위 + * reg : 자식 디바이스의 주소 + * + * cp = 0x0000_0000_0000_0000; + * s = 0x0000_0000_0001_0000; + * addr = reg; + * da = 0x0000_0000_0000_0000; + * + * 자식 디바이스의 버스 범위의 시작 주소와, + * 자식 디바이스의 주소의 차를 반환 + */ cp = of_read_number(range, na); s = of_read_number(range + na + pna, ns); da = of_read_number(addr, na); @@ -498,6 +516,17 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, pr_debug("walking ranges...\n"); /* Now walk through the ranges */ + /* IAMROOT20 20240615 + * ranges = <자식주소 부모주소 자식주소 크기>; + * + * ranges = <0 0 0x10100000 0x10000 + * 1 0 0x10160000 0x10000 + * 2 0 0x30000000 0x1000000>; + * + * rlen = 12 * 4; + * rlen /= 4 = 12; + * rone = 4 = ranges 필드 하나 + */ rlen /= 4; rone = na + pna + ns; for (; rlen >= rone; rlen -= rone, ranges += rone) { @@ -509,6 +538,9 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, pr_debug("not found !\n"); return 1; } + /* IAMROOT20 20240615 + * addr에는 ranges의 OF_BAD_ADDR가 아닌 필드의 부모 주소가 복사된다. + */ memcpy(addr, ranges + na, 4 * pna); finish: @@ -516,6 +548,9 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, pr_debug("with offset: %llx\n", offset); /* Translate it into parent bus space */ + /* IAMROOT20 20240615 + * addr에 offset을 더해서 반환. + */ return pbus->translate(addr, offset, pna); } @@ -555,7 +590,7 @@ static u64 __of_translate_address(struct device_node *dev, if (parent == NULL) goto bail; /* IAMROOT20 20240608 - * bus에 NULL check 필요? + * of_match_bus 안에 BUG() 함수가 있으므로 NULL이 반환되지 않음. */ bus = of_match_bus(parent); @@ -615,6 +650,18 @@ static u64 __of_translate_address(struct device_node *dev, pbus->name, pna, pns, parent); /* Apply bus translation */ + /* IAMROOT20 20240615 + * dev : 함수를 호출한 디바이스의 부모 디바이스 + * bus : 부모 디바이스의 버스 + * pbus : 조부모 디바이스의 버스 + * addr : + * - 함수를 호출한 디바이스의 reg 주소 + * - 이전 루프의 부모 주소 + offset + * na : 부모 디바이스의 address cells 크기 + * ns : 부모 디바이스의 size cells 크기 + * pna : 조부모 디바이스의 address cells 크기 + * rprop : ranges의 property + */ if (of_translate_one(dev, bus, pbus, addr, na, ns, pna, rprop)) break; @@ -638,6 +685,7 @@ u64 of_translate_address(struct device_node *dev, const __be32 *in_addr) u64 ret; /* IAMROOT20_END 20240608 */ + /* IAMROOT20_START 20240615 */ ret = __of_translate_address(dev, of_get_parent, in_addr, "ranges", &host); if (host) { @@ -1163,6 +1211,12 @@ static int __of_address_to_resource(struct device_node *dev, int index, int bar_ if (of_mmio_is_nonposted(dev)) flags |= IORESOURCE_MEM_NONPOSTED; + /* IAMROOT20 20240615 + * r->start = 매핑된 디바이스의 시작 주소 + * r->end = 매핑된 디바이스의 끝 주소 + * r->flags = 디바이스의 flags + * r->name = 디바이스의 이름 + */ r->start = taddr; r->end = taddr + size - 1; r->flags = flags; diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c index ad512bde6e6db..8e3db643061a1 100644 --- a/drivers/of/of_numa.c +++ b/drivers/of/of_numa.c @@ -87,7 +87,11 @@ static int __init of_numa_parse_memory_nodes(void) } /* IAMROOT20_END 20240525 */ - // TODO: + /* IAMROOT20 20240615 + * 매핑된 디바이스의 주소와 정보를 resource 구조체에 넣고, + * 기존에 있던 memblock 영역에서, start에서 end영역의 + * node id를 nid로 설정한다. + */ for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1); @@ -229,6 +233,11 @@ int __init of_numa_init(void) of_numa_parse_cpu_nodes(); /* IAMROOT20_START 20240608 */ + /* IAMROOT20 20240615 + * memory 영역의 디바이스 노드를 파싱하여 + * 해당 영역의 memblock에 nid를 설정하고, + * numa_distance 배열을 distance_map의 내용으로 초기화한다. + */ r = of_numa_parse_memory_nodes(); if (r) return r; From 160e9dd784fc0418d50d6a09851221877faca752 Mon Sep 17 00:00:00 2001 From: fehead Date: Sun, 16 Jun 2024 11:31:27 +0900 Subject: [PATCH 066/104] =?UTF-8?q?IAMROOT20=20of=5Fnuma=5Finit=20?= =?UTF-8?q?=EC=A3=BC=EC=84=9D=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- drivers/base/arch_numa.c | 4 ++++ drivers/of/address.c | 10 ++++++++++ drivers/of/base.c | 9 +++++++++ 3 files changed, 23 insertions(+) diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 8399d25276176..e42ef287e1410 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -23,6 +23,10 @@ EXPORT_SYMBOL(node_data); * numa_init * nodes_clear(numa_nodes_parsed) */ +/* IAMROOT20 20240615 + * numa_add_memblk + * node_set(nid, numa_nodes_parsed); + */ nodemask_t numa_nodes_parsed __initdata; static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; diff --git a/drivers/of/address.c b/drivers/of/address.c index 12dc2f49194e0..b1b42123b49ad 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -501,6 +501,11 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, rlen /= 4; rone = na + pna + ns; for (; rlen >= rone; rlen -= rone, ranges += rone) { + /* IAMROOT20 20240615 + * of_busses[] = { ... + * .map = of_bus_default_map, + * ... }; + */ offset = bus->map(addr, ranges, na, ns, pna); if (offset != OF_BAD_ADDR) break; @@ -515,6 +520,11 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, of_dump_addr("parent translation for:", addr, pna); pr_debug("with offset: %llx\n", offset); + /* IAMROOT20 20240615 + * of_busses[] = { ... + * .translation = of_bus_default_translate, + * ... }; + */ /* Translate it into parent bus space */ return pbus->translate(addr, offset, pna); } diff --git a/drivers/of/base.c b/drivers/of/base.c index 65a4ea3027987..ae2c3763986af 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -322,6 +322,11 @@ EXPORT_SYMBOL(of_get_property); * 10. type * 11. name */ +/* IAMROOT20 20240615 + * of_numa_parse_distance_map(void) + * of_find_compatible_node(NULL, NULL, "numa-distance-map-v1"); + * __of_device_is_compatible(np, "numa-distance-map-v1", NULL, NULL) + */ static int __of_device_is_compatible(const struct device_node *device, const char *compat, const char *type, const char *name) { @@ -964,6 +969,10 @@ EXPORT_SYMBOL(of_find_node_by_type); * Return: A node pointer with refcount incremented, use * of_node_put() on it when done. */ +/* IAMROOT20 20240615 + * of_numa_parse_distance_map(void) + * of_find_compatible_node(NULL, NULL, "numa-distance-map-v1"); + */ struct device_node *of_find_compatible_node(struct device_node *from, const char *type, const char *compatible) { From 8418d1b51c2532fb532c731bc7f006c2cbedb822 Mon Sep 17 00:00:00 2001 From: SoominCho Date: Sat, 22 Jun 2024 19:22:17 +0900 Subject: [PATCH 067/104] IAMROOT20 20240622 add comments to arch_numa.c, kmemleak.c --- drivers/base/arch_numa.c | 5 ++++- mm/kmemleak.c | 10 +++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 2b8b2aca05990..50441d0f4f740 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -296,7 +296,9 @@ void __init numa_free_distance(void) if (!numa_distance) return; - + /* IAMROOT20 20240622 + * size = 16 x 16 x 1 = 256 + */ size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); @@ -444,6 +446,7 @@ static int __init numa_init(int (*init_func)(void)) goto out_free_distance; /* IAMROOT20_END 20240615 */ + /* IAMROOT20_START 20240622 */ if (nodes_empty(numa_nodes_parsed)) { pr_info("No NUMA configuration found\n"); ret = -EINVAL; diff --git a/mm/kmemleak.c b/mm/kmemleak.c index a2d34226e3c8c..ed4858230be44 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -408,7 +408,11 @@ static struct kmemleak_object *__lookup_object(unsigned long ptr, int alias, while (rb) { struct kmemleak_object *object; unsigned long untagged_objp; - + + /* IAMROOT20 20240622 + * rb의 주소에 kmemleak_object구조체에서 rb_node위치의 + * offest을 빼서 object의 주소를 구함. + */ object = rb_entry(rb, struct kmemleak_object, rb_node); untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); @@ -794,6 +798,10 @@ static void delete_object_part(unsigned long ptr, size_t size, bool is_phys) * split. Note that partial freeing is only done by free_bootmem() and * this happens before kmemleak_init() is called. */ + /* IAMROOT20 20240622 + * object보다 제거할 메모리 영역이 작을 경우, + * 앞과 뒤의 object를 각각 생성하고 메모리 영역 삭제 + */ start = object->pointer; end = object->pointer + object->size; if (ptr > start) From 306d833d6416cd70e5bf83f101e166546bd39a72 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 22 Jun 2024 13:04:37 +0000 Subject: [PATCH 068/104] IAMROOT20 20240622 - add comments to arch_numa.c, nodemask.h --- drivers/base/arch_numa.c | 8 +++++++- include/linux/nodemask.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 50441d0f4f740..07d2e7f9c15ff 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -277,7 +277,9 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); if (tnid != nid) pr_info("NODE_DATA(%d) on node %d\n", nid, tnid); - + /* IAMROOT20 20240622 + * NODE_DATA(nid) : pglist_data 배열에서 nid 인덱스에 해당하는 배열의 주소 반환 + */ node_data[nid] = nd; memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); NODE_DATA(nid)->node_id = nid; @@ -413,6 +415,9 @@ static int __init numa_register_nodes(void) unsigned long start_pfn, end_pfn; get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + /* IAMROOT20 20240622 + * nid에 해당하는 pglist_data의 필드를 설정 + */ setup_node_data(nid, start_pfn, end_pfn); node_set_online(nid); } @@ -457,6 +462,7 @@ static int __init numa_init(int (*init_func)(void)) if (ret < 0) goto out_free_distance; + /* IAMROOT20_END 20240622 */ setup_node_to_cpumask_map(); return 0; diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index f35150cf0565e..a4d1b0e160051 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -467,6 +467,9 @@ extern unsigned int nr_online_nodes; static inline void node_set_online(int nid) { node_set_state(nid, N_ONLINE); + /* IAMROOT20 20240622 + * node_states[N_ONLINE] 비트 필드에 1로 설정된 비트의 개수를 반환 + */ nr_online_nodes = num_node_state(N_ONLINE); } From f5d84a8da9857b943e66c7fbaa784478ddb87b39 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 29 Jun 2024 13:05:31 +0000 Subject: [PATCH 069/104] IAMROOT20 20240629 - Add comments to arch_numa.c, __fls.h, bitmap.c, find.h --- drivers/base/arch_numa.c | 1 + include/asm-generic/bitops/__fls.h | 18 ++++++++++++++++++ include/linux/bitmap.h | 10 +++++++++- include/linux/find.h | 1 + 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 07d2e7f9c15ff..dbb6c3b53e936 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -463,6 +463,7 @@ static int __init numa_init(int (*init_func)(void)) goto out_free_distance; /* IAMROOT20_END 20240622 */ + /* IAMROOT20_START 20240629 */ setup_node_to_cpumask_map(); return 0; diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h index 03f721a8a2b19..c5e2ed1563161 100644 --- a/include/asm-generic/bitops/__fls.h +++ b/include/asm-generic/bitops/__fls.h @@ -14,6 +14,24 @@ static __always_inline unsigned long __fls(unsigned long word) { int num = BITS_PER_LONG - 1; +/* IAMROOT20 20240629 + * 값이 있는 비트가 나올 때까지 절반씩 범위를 줄여가며 찾는다. + * word = 0x0000_0000_0000_0003 + * - (word & (0xffff_ffff_0000_0000)), num = 31, word = 0x0000_0003_0000_0000 + * - (word & (0xffff_0000_0000_0000)), num = 15, word = 0x0003_0000_0000_0000 + * - (word & (0xff00_0000_0000_0000)), num = 7, word = 0x0300_0000_0000_0000 + * - (word & (0xf000_0000_0000_0000)), num = 3, word = 0x3000_0000_0000_0000 + * - (word & (0xC000_0000_0000_0000)), num = 1, word = 0xC000_0000_0000_0000 + * - (word & (0x8000_0000_0000_0000)), num = 1 + * + * word = 0x0000_0000_0000_0005 + * - (word & (0xffff_ffff_0000_0000)), num = 31, word = 0x0000_0005_0000_0000 + * - (word & (0xffff_0000_0000_0000)), num = 15, word = 0x0005_0000_0000_0000 + * - (word & (0xff00_0000_0000_0000)), num = 7, word = 0x0500_0000_0000_0000 + * - (word & (0xf000_0000_0000_0000)), num = 3, word = 0x5000_0000_0000_0000 + * - (word & (0xC000_0000_0000_0000)), num = 3, word = 0x5000_0000_0000_0000 + * - (word & (0x8000_0000_0000_0000)), num = 2 + */ #if BITS_PER_LONG == 64 if (!(word & (~0ul << 32))) { num -= 32; diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5c077841a27af..32799f1e1cc30 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -255,12 +255,20 @@ extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, * = ~0UL >> 48 * = 0x0000_0000_0000_ffff * + * exam) nbits = 64 + * ~0UL >> (-64 & 63) + * = ~0UL >> (0xffff_ffff_ffff_ffC0 & 0x3f) + * = ~0UL >> 0x0 + * = 0x_ffff_ffff_ffff_ffff + * * exam) nbits = 1 --> 0x0000_0000_0000_0001 * exam) nbits = 2 --> 0x0000_0000_0000_0003 * exam) nbits = 3 --> 0x0000_0000_0000_0007 * exam) nbits = 17 --> 0x0000_0000_0001_ffff * exam) nbits = 30 --> 0x0000_0000_3fff_ffff - * exam) nbits = 32 --> 0x0000_0000_ffff_ffff + * exam) nbits = 31 --> 0x0000_0000_7fff_ffff + * exam) nbits = 64 --> 0xffff_ffff_ffff_ffff + * exam) nbits = 65 --> 0x0000_0000_0000_0001 */ #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) diff --git a/include/linux/find.h b/include/linux/find.h index 5e4f39ef2e72c..88101497207ef 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -384,6 +384,7 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size) return val ? __fls(val) : size; } + /* IAMROOT20_END 20240629 */ return _find_last_bit(addr, size); } #endif From 48429b1c9380cd86225ba98dd92a641280fdac28 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 6 Jul 2024 13:15:05 +0000 Subject: [PATCH 070/104] IAMROOT20 20240706 - Add comments --- arch/arm64/include/asm/barrier.h | 4 ++++ arch/arm64/include/asm/pgtable-hwdef.h | 4 ++++ arch/arm64/mm/hugetlbpage.c | 4 ++++ drivers/base/arch_numa.c | 9 ++++++++- include/asm-generic/bitops/generic-non-atomic.h | 7 +++++++ include/linux/find.h | 1 - mm/hugetlb.c | 8 ++++++-- mm/memblock.c | 5 ++++- mm/mm_init.c | 1 + 9 files changed, 38 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index cf2987464c186..669d1d9b6a2f6 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -80,6 +80,10 @@ * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz * and 0 otherwise. */ +/* IAMROOT20 20240706 + * idx가 sz보다 크면 0, 작으면 ULONG_MAX 반환 + * sbc : Carry 플래그를 사용하여 빼기 연산 수행 + */ #define array_index_mask_nospec array_index_mask_nospec static inline unsigned long array_index_mask_nospec(unsigned long idx, unsigned long sz) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index fc541758192ab..c6404160f5262 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -129,6 +129,10 @@ * CONT_PMD_SIZE = 16 * SZ_2M = 32MB * CONT_PMD_MASK = ~(32M - 1) */ +/* IAMROOT20 20240706 + * exam) 16K, 4-level + * CONT_PMD_SHIFT = 5 + 25 + */ #define CONT_PMD_SHIFT (CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT) #define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index cef435fdd090e..039710f5971e3 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -46,6 +46,10 @@ void __init arm64_hugetlb_cma_reserve(void) */ order = PUD_SHIFT - PAGE_SHIFT; else + /* IAMROOT20 20240706 + * ex) 16K + * 30 - 14 = 16 + */ order = CONT_PMD_SHIFT - PAGE_SHIFT; /* diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index dbb6c3b53e936..17cc3268605d8 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -463,7 +463,9 @@ static int __init numa_init(int (*init_func)(void)) goto out_free_distance; /* IAMROOT20_END 20240622 */ - /* IAMROOT20_START 20240629 */ + /* IAMROOT20_START 20240629 + * 각 노드에 대한 비트맵 형태의 CPU 마스크를 할당하고 초기화 + */ setup_node_to_cpumask_map(); return 0; @@ -483,6 +485,11 @@ static int __init numa_init(int (*init_func)(void)) * * Return: 0 on success, -errno on failure. */ +/* IAMROOT20 20240706 + * NUMA를 설정한 시스템이 아닌 경우, 더미 데이터를 위해 + * memblock memory 영역의 region 전체의 nid를 0으로 설정 후 + * numa_nodes_parsed의 0번째 비트를 1로 설정 + */ static int __init dummy_numa_init(void) { phys_addr_t start = memblock_start_of_DRAM(); diff --git a/include/asm-generic/bitops/generic-non-atomic.h b/include/asm-generic/bitops/generic-non-atomic.h index 564a8c675d858..29dbf02cb0fe4 100644 --- a/include/asm-generic/bitops/generic-non-atomic.h +++ b/include/asm-generic/bitops/generic-non-atomic.h @@ -125,6 +125,13 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) * so `volatile` must always stay here with no cast-aways. See * `Documentation/atomic_bitops.txt` for the details. */ + /* IAMROOT20 20240706 + * exam) nr = 3 + * 0x0000_0000_0000_0001 & (addr >> (0x0000_0000_0000_0011 & 0x0000_0000_0011_1111)) + * 0x0000_0000_0000_0001 & (addr >> 3) + * addr의 2번째 인덱스까지 모두 삭제 + * addr의 3번째 인덱스의 비트가 1로 되어있는지 확인 + */ return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } diff --git a/include/linux/find.h b/include/linux/find.h index 88101497207ef..5e4f39ef2e72c 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -384,7 +384,6 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size) return val ? __fls(val) : size; } - /* IAMROOT20_END 20240629 */ return _find_last_bit(addr, size); } #endif diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2fd65ff8c12e3..d57f0af14065d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7490,13 +7490,16 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) static bool cma_reserve_called __initdata; /* IAMROOT20 20240607 - * cmdline에 hugetlb_cma=7G,0:1G,1:2G,2:1G,3:1G 일때 + * cmdline에 hugetlb_cma=0:1G,1:2G,2:1G,3:1G 일때 * - * hugetlb_cma_size = 7*SIZE_1G * hugetlb_cma_size_in_node[0] = 1*SIZE_1G * hugetlb_cma_size_in_node[1] = 2*SIZE_1G * hugetlb_cma_size_in_node[2] = 3*SIZE_1G * hugetlb_cma_size_in_node[3] = 1*SIZE_1G + * + * cmdline에 hugetlb_cma=7G 일때 + * + * hugetlb_cma_size = 7*SIZE_1G */ static int __init cmdline_parse_hugetlb_cma(char *p) { @@ -7590,6 +7593,7 @@ void __init hugetlb_cma_reserve(int order) /* IAMROOT20 20240607 * node별 CMA 할당을 할 필요가 없을때. */ + /* IAMROOT20_END 20240706 */ if (!node_specific_cma_alloc) { /* * If 3 GB area is requested on a machine with 4 numa nodes, diff --git a/mm/memblock.c b/mm/memblock.c index 5a2b5704a0068..c4ace8c8410ea 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1459,7 +1459,10 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, #ifdef CONFIG_NUMA int start_rgn, end_rgn; int i, ret; - + /* IAMROOT20 20240706 + * base ~ base + size 영역에 해당하는 memblock type의 + * 시작 region 인덱스와 끝 region 인덱스를 가져온다. + */ ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); if (ret) return ret; diff --git a/mm/mm_init.c b/mm/mm_init.c index 7f7f9c6778546..b65586e386e89 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1740,6 +1740,7 @@ void __init setup_nr_node_ids(void) { unsigned int highest; + /* IAMROOT20_END 20240629 */ /* IAMROOT20_START 20240706 */ highest = find_last_bit(node_possible_map.bits, MAX_NUMNODES); nr_node_ids = highest + 1; } From eeb026d75ed8caa2bab03990d273f494ccca0e53 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 13 Jul 2024 13:15:00 +0000 Subject: [PATCH 071/104] IAMROOT20 20240713 - Add comments --- mm/cma.c | 1 + mm/hugetlb.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/cma.c b/mm/cma.c index ab711fbe4b4f4..05c5a14b3facf 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -325,6 +325,7 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, goto err; } + /* IAMROOT20_END 20240713 */ /* Reserve memory */ if (fixed) { if (memblock_is_region_reserved(base, size) || diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d57f0af14065d..1de975c38c0ac 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7593,7 +7593,7 @@ void __init hugetlb_cma_reserve(int order) /* IAMROOT20 20240607 * node별 CMA 할당을 할 필요가 없을때. */ - /* IAMROOT20_END 20240706 */ + /* IAMROOT20_END 20240706 */ /* IAMROOT20_START 20240713 */ if (!node_specific_cma_alloc) { /* * If 3 GB area is requested on a machine with 4 numa nodes, From 9a9d4930dce0a0a4ff336cf5f46105f3d6f384a2 Mon Sep 17 00:00:00 2001 From: Leem ChaeHoon Date: Sat, 20 Jul 2024 15:10:50 +0900 Subject: [PATCH 072/104] add README.md --- README.md | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000000000..66273ece0c363 --- /dev/null +++ b/README.md @@ -0,0 +1,197 @@ +# ARM 64 리눅스 커널 6.4.2 분석 + +## 커뮤니티: IAMROOT 20차 +- [www.iamroot.org][#iamroot] | IAMROOT 홈페이지 +- [jake.dothome.co.kr][#moonc] | 문c 블로그 + +[#iamroot]: http://www.iamroot.org +[#moonc]: http://jake.dothome.co.kr + +## History + +- 첫 모임: 2015년 4월 25일 (240명으로 시작) + +### 1 주차 +- 2023.05.06 +- 리눅스 커널 내부 구조 1 ~2장 + +### 2 주차 +- 2023.05.06 +- 리눅스 커널 내부 구조 1 ~2장 + +### 3 주차 +- 2023.05.13 40명 +- 리눅스 커널 내부 구조 3 ~ 4장 + +### 4 주차 +- 2023.05.20 25명 +- 리눅스 커널 내부 구조 4 ~ 5장 + +### 5 주차 +- 2023.05.27 +- 부처님 오신날 + +### 6 주차 +- 2023.06.10 15명 +- 리눅스 커널 내부 구조 8장 + +### 7 주차 +- 2023.06.17 20명 +- ARM Architecture + +### 8 주차 +- 2023.06.24 22명 +- ARM Architecture + +### 9 주차 +- 2023.07.01 22명 +- ARM Architecture + +### 10 주차 +- 2023.07.08 22명 + +### 11 주차 +- 2023.07.15 22명 + +### 12 주차 +- 2023.07.22 18명 + +### 13 주차 +- 2023.07.29 19명 + +### 14 주차 +- 2023.08.05 17명 + +### 15 주차 +- 2023.08.12 12명 + +### 16 주차 +- 2023.08.19 12명 + +### 17 주차 +- 2023.08.26 10명 +- Arm 아키텍처의 구조와 원리 + +### 18 주차 +- 2023.09.02 10명 +- Arm 아키텍처의 구조와 원리 + +### 19 주차 +- 2023.09.09 11명 +- Arm 아키텍처의 구조와 원리 + +### 20 주차 +- 2023.09.16 9명 +- Arm 아키텍처의 구조와 원리 + +### 21 주차 +- 2023.09.23 12명 +- Arm 아키텍처의 구조와 원리 + +### 22 주차 +- 2023.09.30 +- 추석 + +### 23 주차 +- 2023.10.07 12명 +- Arm 아키텍처의 구조와 원리 + +### 24 주차 +- 2023.10.14 10명 +- Arm 아키텍처의 구조와 원리 + +### 25 주차 +- 2023.10.21 10명 + +### 26 주차 +- 2023.10.28 11명 + +### 27 주차 +- 2023.11.04 9명 + +### 28 주차 +- 2023.11.11 10명 + +### 29 주차 +- 2023.11.18 8명 + +### 30 주차 +- 2023.11.25 8명 + +### 31 주차 +- 2023.12.02 6명 + +### 32 주차 +- 2023.12.09 4명 + +### 33 주차 +- 2023.12.16 3명 + +### 34 주차 +- 2023.12.23 4명 + +### 35 주차 +- 2023.12.30 +- 새해 연휴 + +### 36 주차 +- 2024.01.06 6명 + +### 37 주차 +- 2024.01.13 5명 + +### 38 주차 +- 2024.01.20 7명 + +### 39 주차 +- 2024.01.20 5명 + +### 40 주차 +- 2024.02.03 6명 + +### 41 주차 +- 2024.02.10 +- 설 연휴 + +### 42 주차 +- 2024.02.17 6명 + +### 43 주차 +- 2024.02.24 7명 + +### 44 주차 +- 2024.03.02 5명 + +### 45 주차 +- 2024.03.09 + +### 46 주차 +- 2024.03.16 + +### 47 주차 +- 2024.03.23 + +### 48 주차 +- 2024.03.30 + +### 49 주차 +- 2024.04.06 4명 + +### 48 주차 +- 2024.04.20 5명 + +### 49 주차 +- 2024.04.27 6명 + +### 50 주차 +- 2024.05.04 3명 + +### 51 주차 +- 2024.05.11 3명 + +### 52 주차 +- 2024.05.18 3명 + +### 53 주차 +- 2024.05.25 3명 + From 2c46c16b4f67a5ddf9bd096e5419cc7adf7c21ac Mon Sep 17 00:00:00 2001 From: Leem ChaeHoon Date: Sat, 20 Jul 2024 17:35:49 +0900 Subject: [PATCH 073/104] =?UTF-8?q?64=20=EC=A3=BC=EC=B0=A8=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 66273ece0c363..329f49d084cf0 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,7 @@ [#moonc]: http://jake.dothome.co.kr ## History - -- 첫 모임: 2015년 4월 25일 (240명으로 시작) +- 첫 모임: 2023년 5월 06일 (약 47명으로 시작) ### 1 주차 - 2023.05.06 @@ -195,3 +194,16 @@ ### 53 주차 - 2024.05.25 3명 +### 61 주차 +- 2024.07.06 2명(임채훈, 최경건) + +### 62 주차 +- 2024.07.06 4명(이대로, 임채훈, 조수민, 최경건) + +### 63 주차 +- 2024.07.13 2명(임채훈, 최경건) +- arm64_hugetlb_cma_reserve + +### 64 주차 +- 2024.07.20 2명(이대로, 임채훈) +- arm64_hugetlb_cma_reserve From 124a54e58f60a916677030ceb497b24368400a6d Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 20 Jul 2024 17:45:44 +0900 Subject: [PATCH 074/104] IAMROOT20 20240720 - Add comments Signed-off-by: Daero Lee --- README.md | 2 ++ arch/arm64/kvm/pkvm.c | 3 --- arch/arm64/mm/init.c | 3 +++ include/linux/mmzone.h | 20 ++++++++++++++++++++ kernel/dma/contiguous.c | 9 +++++++++ mm/cma.c | 22 +++++++++++++++++++++- mm/hugetlb.c | 4 ++++ mm/sparse.c | 40 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 99 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 329f49d084cf0..5e65c46d77bb2 100644 --- a/README.md +++ b/README.md @@ -207,3 +207,5 @@ ### 64 주차 - 2024.07.20 2명(이대로, 임채훈) - arm64_hugetlb_cma_reserve +- dma_pernuma_cma_reserve +- sparse_init ~ diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 0c356e7e83842..6e9ece1ebbe72 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -56,9 +56,6 @@ static int __init register_memblock_regions(void) return 0; } -/* IAMROOT20 20240607 - * TODO: - */ void __init kvm_hyp_reserve(void) { u64 hyp_mem_pages = 0; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index c184d458419d6..9eb3b6cf55cc9 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -493,6 +493,9 @@ void __init bootmem_init(void) dma_pernuma_cma_reserve(); + /* IAMROOT20 20240720 + * kvm은 분석하지 않음 + */ kvm_hyp_reserve(); /* diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a4bf4d56b5f61..e7a24f72640bd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1728,6 +1728,11 @@ static inline bool movable_only_nodes(nodemask_t *nodes) #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) +/* IAMROOT20 20240720 + * ex) 4K인 경우, + * PAGES_PER_SECTION (1 << PFN_SECTION_SHIFT) = (1 << 15) + * PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) = 0xFFFF_FFFF_FFFF_8000 + */ #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) @@ -1814,6 +1819,18 @@ struct mem_section { #define SECTIONS_PER_ROOT 1 #endif +/* IAMROOT20 20240720 + * ex) 4K, PA=48 인 경우 + * NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT) + * = 2^21 / 2^8 = 2^13(8096) + * + * NR_MEM_SECTIONS (1 << SECTIONS_SHIFT) = (1 << 21) + * SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) = 21 + * MAX_PHYSMEM_BITS 48 + * SECTION_SIZE_BITS 27 + * + * SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) = 4K / 16 = 256(2^8) + */ #define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT) #define NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT) #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) @@ -1829,6 +1846,9 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms) return ms->usage->pageblock_flags; } +/* IAMROOT20 20240720 + * __nr_to_section() : section number를 mem_section 구조체 주소로 변환 + */ static inline struct mem_section *__nr_to_section(unsigned long nr) { unsigned long root = SECTION_NR_TO_ROOT(nr); diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 6ea80ae426228..8e4fd0f220338 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -128,6 +128,12 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) #endif #ifdef CONFIG_DMA_PERNUMA_CMA +/* IAMROOT20 20240720 + * kernel parameter 설정 : cma_pernuma=nn[MG] + * - enable 되어 있으면 + * DMA user가 버퍼를 할당할 때, pernuma area에서 메모리를 먼저 찾고, + * 실패한 경우 global default memory 에서 메모리를 할당함 + */ void __init dma_pernuma_cma_reserve(void) { int nid; @@ -141,6 +147,9 @@ void __init dma_pernuma_cma_reserve(void) struct cma **cma = &dma_contiguous_pernuma_area[nid]; snprintf(name, sizeof(name), "pernuma%d", nid); + /* IAMROOT20 20240720 + * pernuma_size_bytes 만큼 memblock에서 메모리 할당(reserved로 표시) + */ ret = cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0, 0, false, name, cma, nid); if (ret) { diff --git a/mm/cma.c b/mm/cma.c index 05c5a14b3facf..e7ff8ef4cead5 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -325,8 +325,15 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, goto err; } - /* IAMROOT20_END 20240713 */ + /* IAMROOT20_END 20240713 */ /* IAMROOT20_START 20240720 */ /* Reserve memory */ + /* IAMROOT20 20240720 + * 1) fixed == true + * - base가 지정되어 있는 경우 + * 2) fixed == false + * - base가 지정되어 있지 않은 경우, + * memblock에서 할당가능한 영역(size)을 찾아서 reserved로 설정한다 + */ if (fixed) { if (memblock_is_region_reserved(base, size) || memblock_reserve(base, size) < 0) { @@ -344,6 +351,11 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, * Avoid using first 4GB to not interfere with constrained zones * like DMA/DMA32. */ + /* IAMROOT20 20240720 + * 메모리가 충분한 경우, cma를 bottom-up으로 할당 + * - 메모리 compaction하는 경우, cma 할당 fail을 막을 수 있음: + * - DMA32 영역의 간섭을 피하기 위해 start는 4G 이후로 설정 + */ #ifdef CONFIG_PHYS_ADDR_T_64BIT if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) { memblock_set_bottom_up(true); @@ -359,12 +371,20 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, * try allocating from high memory first and fall back to low * memory in case of failure. */ + /* IAMROOT20 20240720 + * arm64의 경우 highmem을 사용하지 않기 때문에 아래 if문을 수행하지 않음 + * - highmem_start = memblock_end + */ if (!addr && base < highmem_start && limit > highmem_start) { addr = memblock_alloc_range_nid(size, alignment, highmem_start, limit, nid, true); limit = highmem_start; } + /* IAMROOT20 20240720 + * 위의 두 if문에서 할당하지 못한 경우, 실제로 여기서 memblock 할당 + * - base ~ limit에서 size 만큼의 메모리 영역을 할당 + */ if (!addr) { addr = memblock_alloc_range_nid(size, alignment, base, limit, nid, true); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1de975c38c0ac..228173124debf 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7626,6 +7626,10 @@ void __init hugetlb_cma_reserve(int order) * may be returned to CMA allocator in the case of * huge page demotion. */ + /* IAMROOT20 20240720 + * - size 만큼 memblock에서 continguous memory 할당 + * - hugetlb_cma[nid]에 할당한 메모리 정보(struct cma)에 대한 포인터 저장 + */ res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << HUGETLB_PAGE_ORDER, 0, false, name, diff --git a/mm/sparse.c b/mm/sparse.c index c2afdb26039e5..151c1e67f8ff6 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -38,6 +38,9 @@ EXPORT_SYMBOL(mem_section); * node the page belongs to. */ #if MAX_NUMNODES <= 256 +/* IAMROOT20 20240720 + * section_to_node_table[] : section -> node 매핑 테이블 + */ static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; #else static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; @@ -81,6 +84,9 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid) static int __meminit sparse_index_init(unsigned long section_nr, int nid) { + /* IAMROOT20 20240720 + * root는 *mem_section 배열에서의 index + */ unsigned long root = SECTION_NR_TO_ROOT(section_nr); struct mem_section *section; @@ -94,6 +100,10 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) if (mem_section[root]) return 0; + /* IAMROOT20 20240720 + * mem_section[root]가 초기화되어 있지 않으면, 메모리를 할당하고(section) + * mem_section[root]에 저장한다(mem_section[root] = section) + */ section = sparse_index_alloc(nid); if (!section) return -ENOMEM; @@ -115,6 +125,9 @@ static inline int sparse_index_init(unsigned long section_nr, int nid) * node. This keeps us from having to use another data structure. The * node information is cleared just before we store the real mem_map. */ +/* IAMROOT20 20240720 + * SECTION_NID_SHIFT 4 + */ static inline unsigned long sparse_encode_early_nid(int nid) { return ((unsigned long)nid << SECTION_NID_SHIFT); @@ -129,6 +142,12 @@ static inline int sparse_early_nid(struct mem_section *section) static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, unsigned long *end_pfn) { + /* IAMROOT20 20240720 + * MAX_PHYSMEM_BITS = 48 + * PAGE_SHIFT = 12 + * + * max_sparsemem_pfn = 1 << 36 + */ unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); /* @@ -226,11 +245,21 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) static void __init memory_present(int nid, unsigned long start, unsigned long end) { unsigned long pfn; + /* IAMROOT20 20240720 + * + */ #ifdef CONFIG_SPARSEMEM_EXTREME if (unlikely(!mem_section)) { unsigned long size, align; + /* IAMROOT20 20240720 + * ex) 4K, PA=48인 경우, + * NR_SECTION_ROOTS = 2^13(8096) + * + * INTERNODE_CACHE_SHIFT = 6 + * align = 64 + */ size = sizeof(struct mem_section *) * NR_SECTION_ROOTS; align = 1 << (INTERNODE_CACHE_SHIFT); mem_section = memblock_alloc(size, align); @@ -240,8 +269,14 @@ static void __init memory_present(int nid, unsigned long start, unsigned long en } #endif + /* IAMROOT20 20240720 + * PAGE_SECTION_MASK = 0xFFFF_FFFF_FFFF_8000 + */ start &= PAGE_SECTION_MASK; mminit_validate_memmodel_limits(&start, &end); + /* IAMROOT20 20240720 + * section 크기(PAGES_PER_SECTION, 128MB default)단위로 for문 수행 + */ for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { unsigned long section = pfn_to_section_nr(pfn); struct mem_section *ms; @@ -250,6 +285,10 @@ static void __init memory_present(int nid, unsigned long start, unsigned long en set_section_nid(section, nid); ms = __nr_to_section(section); + /* IAMROOT20 20240720 + * 부팅 초기에는 section_mem_map의 하위 몇 비트를 + * nid와 section status를 표시하는 데 사용 + */ if (!ms->section_mem_map) { ms->section_mem_map = sparse_encode_early_nid(nid) | SECTION_IS_ONLINE; @@ -569,6 +608,7 @@ void __init sparse_init(void) /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */ set_pageblock_order(); + /* IAMROOT20_END 20240720 */ for_each_present_section_nr(pnum_begin + 1, pnum_end) { int nid = sparse_early_nid(__nr_to_section(pnum_end)); From fa95c2fc6a7d3f5c6ed9775289fb07c01398b04f Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 20 Jul 2024 22:14:39 +0900 Subject: [PATCH 075/104] IAMROOT20 20240720 - modify README.md Signed-off-by: Daero Lee --- README.md | 281 +++++++++++++++++++++++++++--------------------------- 1 file changed, 141 insertions(+), 140 deletions(-) diff --git a/README.md b/README.md index 5e65c46d77bb2..80f8495d11d3c 100644 --- a/README.md +++ b/README.md @@ -7,205 +7,206 @@ [#iamroot]: http://www.iamroot.org [#moonc]: http://jake.dothome.co.kr +[#이대로] : skseofh@gmail.com + ## History -- 첫 모임: 2023년 5월 06일 (약 47명으로 시작) -### 1 주차 -- 2023.05.06 -- 리눅스 커널 내부 구조 1 ~2장 +### 64 주차 +- 2024.07.20 2명(이대로, 임채훈) +- arm64_hugetlb_cma_reserve +- dma_pernuma_cma_reserve +- sparse_init ~ -### 2 주차 -- 2023.05.06 -- 리눅스 커널 내부 구조 1 ~2장 +### 63 주차 +- 2024.07.13 2명(임채훈, 최경건) +- arm64_hugetlb_cma_reserve -### 3 주차 -- 2023.05.13 40명 -- 리눅스 커널 내부 구조 3 ~ 4장 +### 62 주차 +- 2024.07.06 4명(이대로, 임채훈, 조수민, 최경건) -### 4 주차 -- 2023.05.20 25명 -- 리눅스 커널 내부 구조 4 ~ 5장 +### 61 주차 +- 2024.07.06 2명(임채훈, 최경건) -### 5 주차 -- 2023.05.27 -- 부처님 오신날 +### 53 주차 +- 2024.05.25 3명 -### 6 주차 -- 2023.06.10 15명 -- 리눅스 커널 내부 구조 8장 +### 52 주차 +- 2024.05.18 3명 -### 7 주차 -- 2023.06.17 20명 -- ARM Architecture +### 51 주차 +- 2024.05.11 3명 -### 8 주차 -- 2023.06.24 22명 -- ARM Architecture +### 50 주차 +- 2024.05.04 3명 -### 9 주차 -- 2023.07.01 22명 -- ARM Architecture +### 49 주차 +- 2024.04.27 6명 -### 10 주차 -- 2023.07.08 22명 +### 48 주차 +- 2024.04.20 5명 -### 11 주차 -- 2023.07.15 22명 +### 49 주차 +- 2024.04.06 4명 -### 12 주차 -- 2023.07.22 18명 +### 48 주차 +- 2024.03.30 -### 13 주차 -- 2023.07.29 19명 +### 47 주차 +- 2024.03.23 -### 14 주차 -- 2023.08.05 17명 +### 46 주차 +- 2024.03.16 -### 15 주차 -- 2023.08.12 12명 +### 45 주차 +- 2024.03.09 -### 16 주차 -- 2023.08.19 12명 +### 44 주차 +- 2024.03.02 5명 -### 17 주차 -- 2023.08.26 10명 -- Arm 아키텍처의 구조와 원리 +### 43 주차 +- 2024.02.24 7명 -### 18 주차 -- 2023.09.02 10명 -- Arm 아키텍처의 구조와 원리 +### 42 주차 +- 2024.02.17 6명 -### 19 주차 -- 2023.09.09 11명 -- Arm 아키텍처의 구조와 원리 +### 41 주차 +- 2024.02.10 +- 설 연휴 -### 20 주차 -- 2023.09.16 9명 -- Arm 아키텍처의 구조와 원리 +### 40 주차 +- 2024.02.03 6명 -### 21 주차 -- 2023.09.23 12명 -- Arm 아키텍처의 구조와 원리 +### 39 주차 +- 2024.01.20 5명 -### 22 주차 -- 2023.09.30 -- 추석 +### 38 주차 +- 2024.01.20 7명 -### 23 주차 -- 2023.10.07 12명 -- Arm 아키텍처의 구조와 원리 +### 37 주차 +- 2024.01.13 5명 -### 24 주차 -- 2023.10.14 10명 -- Arm 아키텍처의 구조와 원리 +### 36 주차 +- 2024.01.06 6명 -### 25 주차 -- 2023.10.21 10명 +### 35 주차 +- 2023.12.30 +- 새해 연휴 -### 26 주차 -- 2023.10.28 11명 +### 34 주차 +- 2023.12.23 4명 -### 27 주차 -- 2023.11.04 9명 +### 33 주차 +- 2023.12.16 3명 -### 28 주차 -- 2023.11.11 10명 +### 32 주차 +- 2023.12.09 4명 -### 29 주차 -- 2023.11.18 8명 +### 31 주차 +- 2023.12.02 6명 ### 30 주차 - 2023.11.25 8명 -### 31 주차 -- 2023.12.02 6명 +### 29 주차 +- 2023.11.18 8명 -### 32 주차 -- 2023.12.09 4명 +### 28 주차 +- 2023.11.11 10명 -### 33 주차 -- 2023.12.16 3명 +### 27 주차 +- 2023.11.04 9명 -### 34 주차 -- 2023.12.23 4명 +### 26 주차 +- 2023.10.28 11명 -### 35 주차 -- 2023.12.30 -- 새해 연휴 +### 25 주차 +- 2023.10.21 10명 -### 36 주차 -- 2024.01.06 6명 +### 24 주차 +- 2023.10.14 10명 +- Arm 아키텍처의 구조와 원리 -### 37 주차 -- 2024.01.13 5명 +### 23 주차 +- 2023.10.07 12명 +- Arm 아키텍처의 구조와 원리 -### 38 주차 -- 2024.01.20 7명 +### 22 주차 +- 2023.09.30 +- 추석 -### 39 주차 -- 2024.01.20 5명 +### 21 주차 +- 2023.09.23 12명 +- Arm 아키텍처의 구조와 원리 -### 40 주차 -- 2024.02.03 6명 +### 20 주차 +- 2023.09.16 9명 +- Arm 아키텍처의 구조와 원리 -### 41 주차 -- 2024.02.10 -- 설 연휴 +### 19 주차 +- 2023.09.09 11명 +- Arm 아키텍처의 구조와 원리 -### 42 주차 -- 2024.02.17 6명 +### 18 주차 +- 2023.09.02 10명 +- Arm 아키텍처의 구조와 원리 -### 43 주차 -- 2024.02.24 7명 +### 17 주차 +- 2023.08.26 10명 +- Arm 아키텍처의 구조와 원리 -### 44 주차 -- 2024.03.02 5명 +### 16 주차 +- 2023.08.19 12명 -### 45 주차 -- 2024.03.09 +### 15 주차 +- 2023.08.12 12명 -### 46 주차 -- 2024.03.16 +### 14 주차 +- 2023.08.05 17명 -### 47 주차 -- 2024.03.23 +### 13 주차 +- 2023.07.29 19명 -### 48 주차 -- 2024.03.30 +### 12 주차 +- 2023.07.22 18명 -### 49 주차 -- 2024.04.06 4명 +### 11 주차 +- 2023.07.15 22명 -### 48 주차 -- 2024.04.20 5명 +### 10 주차 +- 2023.07.08 22명 -### 49 주차 -- 2024.04.27 6명 +### 9 주차 +- 2023.07.01 22명 +- ARM Architecture -### 50 주차 -- 2024.05.04 3명 +### 8 주차 +- 2023.06.24 22명 +- ARM Architecture -### 51 주차 -- 2024.05.11 3명 +### 7 주차 +- 2023.06.17 20명 +- ARM Architecture -### 52 주차 -- 2024.05.18 3명 +### 6 주차 +- 2023.06.10 15명 +- 리눅스 커널 내부 구조 8장 -### 53 주차 -- 2024.05.25 3명 +### 5 주차 +- 2023.05.27 +- 부처님 오신날 -### 61 주차 -- 2024.07.06 2명(임채훈, 최경건) +### 4 주차 +- 2023.05.20 25명 +- 리눅스 커널 내부 구조 4 ~ 5장 -### 62 주차 -- 2024.07.06 4명(이대로, 임채훈, 조수민, 최경건) +### 3 주차 +- 2023.05.13 40명 +- 리눅스 커널 내부 구조 3 ~ 4장 -### 63 주차 -- 2024.07.13 2명(임채훈, 최경건) -- arm64_hugetlb_cma_reserve +### 2 주차 +- 2023.05.06 +- 리눅스 커널 내부 구조 1 ~2장 -### 64 주차 -- 2024.07.20 2명(이대로, 임채훈) -- arm64_hugetlb_cma_reserve -- dma_pernuma_cma_reserve -- sparse_init ~ +### 1 주차 +- 2023.05.06 (약 47명으로 시작) +- 리눅스 커널 내부 구조 1 ~2장 From 3a6e6903a2fe887079a6e8709cfa49a9f0bc44c7 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 20 Jul 2024 22:14:39 +0900 Subject: [PATCH 076/104] IAMROOT20 20240720 - modify README.md Signed-off-by: Daero Lee --- README.md | 94 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 80f8495d11d3c..148f3189290c0 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,9 @@ [#iamroot]: http://www.iamroot.org [#moonc]: http://jake.dothome.co.kr -[#이대로] : skseofh@gmail.com +## 스터디 멤버 +- 이대로 +- 임채훈 ## History @@ -17,76 +19,98 @@ - dma_pernuma_cma_reserve - sparse_init ~ -### 63 주차 +### 65 주차 - 2024.07.13 2명(임채훈, 최경건) - arm64_hugetlb_cma_reserve +### 64 주차 +- 2024.07.06 2명(임채훈, 최경건) + +### 63 주차 +- 2024.06.29 2명(임채훈, 최경건) + ### 62 주차 -- 2024.07.06 4명(이대로, 임채훈, 조수민, 최경건) +- 2024.06.22 3명(임채훈, 조수민, 최경건) ### 61 주차 -- 2024.07.06 2명(임채훈, 최경건) +- 2024.06.15 4명(이대로, 임채훈, 조수민, 최경건) + +### 60 주차 +- 2024.06.08 3명(임채훈, 조수민, 최경건) + +### 59 주차 +- 2024.06.01 1명(임채훈) + +### 58 주차 +- 2024.05.25 4명(이대로, 임채훈, 조수민, 최경건) + +### 57 주차 +- 2024.05.18 3명(이대로, 임채훈, 조수민) + +### 56 주차 +- 2024.05.11 3명(이대로, 임채훈, 조수민) + +### 55 주차 +- 2024.05.04 +- 어린이날 연휴 + +### 54 주차 +- 2024.04.27 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) ### 53 주차 -- 2024.05.25 3명 +- 2024.04.20 5명(이대로, 임채훈, 최경건, ??) ### 52 주차 -- 2024.05.18 3명 +- 2024.04.13 3명(이대로, 임채훈, 최경건) ### 51 주차 -- 2024.05.11 3명 +- 2024.04.06 4명(이대로, 임채훈, 최경건) ### 50 주차 -- 2024.05.04 3명 +- 2024.03.30 2명(박성수, 임채훈) ### 49 주차 -- 2024.04.27 6명 +- 2024.03.26 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) ### 48 주차 -- 2024.04.20 5명 - -### 49 주차 -- 2024.04.06 4명 - -### 48 주차 -- 2024.03.30 +- 2024.03.23 2명(이대로, 임채훈) ### 47 주차 -- 2024.03.23 +- 2024.03.16 (K, ruffalo2881, 박성수, 이대로, 임채훈, 조수민) ### 46 주차 -- 2024.03.16 +- 2024.03.09 (박성수, 이대로, 임채훈, 조수민) ### 45 주차 -- 2024.03.09 +- 2024.03.02 5명(박성수, 이대로, 임채훈, 조수민) ### 44 주차 -- 2024.03.02 5명 +- 2024.02.24 7명(K, 김형우, 박성수, 이대로, 임채훈, 조수민, 최경건) ### 43 주차 -- 2024.02.24 7명 +- 2024.02.17 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) ### 42 주차 -- 2024.02.17 6명 - -### 41 주차 - 2024.02.10 - 설 연휴 +### 41 주차 +- 2024.02.03 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) + ### 40 주차 -- 2024.02.03 6명 +- 2024.01.27 6명(김형우, 박성수, 이대로, 임채훈, 조수민, 최경건) ### 39 주차 -- 2024.01.20 5명 +- 2024.01.20 5명(박성수, 이대로, 임채훈, 조수민, 최경건) ### 38 주차 -- 2024.01.20 7명 +- 2024.01.20 7명(K, 박성수, 이대로, 임채훈, 최경건) ### 37 주차 -- 2024.01.13 5명 +- 2024.01.13 5명(K, 김해성, 박성수, 임채훈, 최경건) ### 36 주차 -- 2024.01.06 6명 +- 2024.01.06 6명(K, 박성수, 이대로, 임채훈, 최경건) ### 35 주차 - 2023.12.30 @@ -192,21 +216,21 @@ - 리눅스 커널 내부 구조 8장 ### 5 주차 +- 2023.06.03 +- 리눅스 커널 내부 구조 + +### 4 주차 - 2023.05.27 - 부처님 오신날 -### 4 주차 +### 3 주차 - 2023.05.20 25명 - 리눅스 커널 내부 구조 4 ~ 5장 -### 3 주차 +### 2 주차 - 2023.05.13 40명 - 리눅스 커널 내부 구조 3 ~ 4장 -### 2 주차 -- 2023.05.06 -- 리눅스 커널 내부 구조 1 ~2장 - ### 1 주차 - 2023.05.06 (약 47명으로 시작) - 리눅스 커널 내부 구조 1 ~2장 From 8a103d8b09af84ef71120cedc227a8a3e237e102 Mon Sep 17 00:00:00 2001 From: Leem ChaeHoon Date: Sun, 21 Jul 2024 18:31:04 +0900 Subject: [PATCH 077/104] modify README.md.. --- README.md | 57 ++++++++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 148f3189290c0..296ac5fb15d33 100644 --- a/README.md +++ b/README.md @@ -13,95 +13,92 @@ ## History -### 64 주차 +### 65 주차 - 2024.07.20 2명(이대로, 임채훈) - arm64_hugetlb_cma_reserve - dma_pernuma_cma_reserve - sparse_init ~ -### 65 주차 +### 64 주차 - 2024.07.13 2명(임채훈, 최경건) - arm64_hugetlb_cma_reserve -### 64 주차 +### 63 주차 - 2024.07.06 2명(임채훈, 최경건) -### 63 주차 +### 62 주차 - 2024.06.29 2명(임채훈, 최경건) -### 62 주차 +### 61 주차 - 2024.06.22 3명(임채훈, 조수민, 최경건) -### 61 주차 +### 60 주차 - 2024.06.15 4명(이대로, 임채훈, 조수민, 최경건) -### 60 주차 +### 59 주차 - 2024.06.08 3명(임채훈, 조수민, 최경건) -### 59 주차 +### 58 주차 - 2024.06.01 1명(임채훈) -### 58 주차 +### 57 주차 - 2024.05.25 4명(이대로, 임채훈, 조수민, 최경건) -### 57 주차 +### 56 주차 - 2024.05.18 3명(이대로, 임채훈, 조수민) -### 56 주차 +### 55 주차 - 2024.05.11 3명(이대로, 임채훈, 조수민) -### 55 주차 +### 54 주차 - 2024.05.04 - 어린이날 연휴 -### 54 주차 +### 53 주차 - 2024.04.27 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) -### 53 주차 +### 52 주차 - 2024.04.20 5명(이대로, 임채훈, 최경건, ??) -### 52 주차 +### 51 주차 - 2024.04.13 3명(이대로, 임채훈, 최경건) -### 51 주차 +### 50 주차 - 2024.04.06 4명(이대로, 임채훈, 최경건) -### 50 주차 +### 49 주차 - 2024.03.30 2명(박성수, 임채훈) -### 49 주차 +### 48 주차 - 2024.03.26 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) -### 48 주차 +### 47 주차 - 2024.03.23 2명(이대로, 임채훈) -### 47 주차 +### 46 주차 - 2024.03.16 (K, ruffalo2881, 박성수, 이대로, 임채훈, 조수민) -### 46 주차 +### 45 주차 - 2024.03.09 (박성수, 이대로, 임채훈, 조수민) -### 45 주차 +### 44 주차 - 2024.03.02 5명(박성수, 이대로, 임채훈, 조수민) -### 44 주차 +### 43 주차 - 2024.02.24 7명(K, 김형우, 박성수, 이대로, 임채훈, 조수민, 최경건) -### 43 주차 +### 42 주차 - 2024.02.17 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) -### 42 주차 +### 41 주차 - 2024.02.10 - 설 연휴 -### 41 주차 -- 2024.02.03 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) - ### 40 주차 -- 2024.01.27 6명(김형우, 박성수, 이대로, 임채훈, 조수민, 최경건) +- 2024.02.03 6명(K, 박성수, 이대로, 임채훈, 조수민, 최경건) ### 39 주차 -- 2024.01.20 5명(박성수, 이대로, 임채훈, 조수민, 최경건) +- 2024.01.27 6명(김형우, 박성수, 이대로, 임채훈, 조수민, 최경건) ### 38 주차 - 2024.01.20 7명(K, 박성수, 이대로, 임채훈, 최경건) From 46bfa0b500e4ca1c790dc4ee60eeefbe428e285d Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 27 Jul 2024 21:11:04 +0900 Subject: [PATCH 078/104] IAMROOT20 20240727 - Add comments Signed-off-by: Daero Lee --- include/linux/mmzone.h | 17 +++++++++++++++++ mm/mm_init.c | 4 ++++ mm/sparse-vmemmap.c | 11 +++++++++++ mm/sparse.c | 43 ++++++++++++++++++++++++++++++++++++++---- 4 files changed, 71 insertions(+), 4 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e7a24f72640bd..7a0ed8f5e93c1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1736,6 +1736,13 @@ static inline bool movable_only_nodes(nodemask_t *nodes) #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) +/* IAMROOT20 20240727 + * PFN_SECTION_SHIFT = 27 - 12 = 15 + * pageblock_order = 9 + * NR_PAGEBLOCK_BITS = 4 + * + * SECTION_BLOCKFLAGS_BITS = (1 << (15 - 9)) * 4 = 256 + */ #define SECTION_BLOCKFLAGS_BITS \ ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS) @@ -1758,6 +1765,11 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SUBSECTION_SHIFT 21 #define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT) +/* IAMROOT20 20240727 + * PFN_SUBSECTION_SHIFT = 21 - 12 = 9 + * PAGES_PER_SUBSECTION = 1 << 9 = 512 + * PAGE_SUBSECTION_MASK = 0xFFFF_FFFF_FFFF_FE00 + */ #define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT) #define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT) #define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1)) @@ -1765,6 +1777,11 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #if SUBSECTION_SHIFT > SECTION_SIZE_BITS #error Subsection size exceeds section size #else +/* IAMROOT20 20240727 + * SECTION_SIZE_BITS = 27 + * SUBSECTION_SHIFT = 21 + * SUBSECTIONS_PER_SECTION = 1 << 6 = 64 + */ #define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT)) #endif diff --git a/mm/mm_init.c b/mm/mm_init.c index b65586e386e89..eeabcb294959b 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -593,6 +593,10 @@ static int __meminit __early_pfn_to_nid(unsigned long pfn, if (state->last_start <= pfn && pfn < state->last_end) return state->last_nid; + /* IAMROOT20 20240727 + * pfn에 해당하는 memblock.memory 영역을 찾아 nid를 return + * - start_pfn, end_pfn을 업데이트 + */ nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); if (nid != NUMA_NO_NODE) { state->last_start = start_pfn; diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 10d73a0dfcec7..beed1ad1e0b91 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -316,6 +316,9 @@ int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, pud_t *pud; pmd_t *pmd; + /* IAMROOT20 20240727 + * PMD size(2M)만큼 증가시키면서 pgd, p4d, pud, pmd populate + */ for (addr = start; addr < end; addr = next) { next = pmd_addr_end(addr, end); @@ -352,6 +355,10 @@ int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, } } else if (vmemmap_check_pmd(pmd, node, addr, next)) continue; + /* IAMROOT20 20240727 + * PMD size로 mapping을 실패한 경우, + * page size로 mapping을 시도한다 + */ if (vmemmap_populate_basepages(addr, next, node, altmap)) return -ENOMEM; } @@ -450,6 +457,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap) { + /* IAMROOT20 20240727 + * start : pfn이 가리키는 page 구조체 + * end : 마지막 page 구조체 + */ unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); int r; diff --git a/mm/sparse.c b/mm/sparse.c index 151c1e67f8ff6..db6c246e9b845 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -245,9 +245,6 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) static void __init memory_present(int nid, unsigned long start, unsigned long end) { unsigned long pfn; - /* IAMROOT20 20240720 - * - */ #ifdef CONFIG_SPARSEMEM_EXTREME if (unlikely(!mem_section)) { @@ -347,11 +344,23 @@ static void __meminit sparse_init_one_section(struct mem_section *ms, ms->usage = usage; } +/* IAMROOT20 20240727 + * SECTION_BLOCKFLAGS_BITS 256 + * BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) = 4 + * + * usemap_size = 4 * 8 = 32 bytes + */ static unsigned long usemap_size(void) { return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long); } +/* IAMROOT20 20240727 + * sizeof(struct mem_section_usage) = 8 + * usemap_size() = 32 + * + * -> mem_section_usage.pageblock_flags[4]가 되는 효과 + */ size_t mem_section_usage_size(void) { return sizeof(struct mem_section_usage) + usemap_size(); @@ -385,6 +394,13 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, * from the same section as the pgdat where possible to avoid * this problem. */ + /* IAMROOT20 20240727 + * PAGE_SECTION_MASK << PAGE_SHIFT = 0xFFFF_FFFF_F800_0000 + * + * pgdat과 usemap이 다른 section에 있으면 상호 의존성이 생겨 다른 섹션이 + * usemap을 참조하고 있는 동안 해당 섹션을 제거할 수 없게 함 + * 이를 방지하기 위해서 pgdat과 usemap을 동일한 섹션에 할당 + */ goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); limit = goal + (1UL << PA_SECTION_SHIFT); nid = early_pfn_to_nid(goal >> PAGE_SHIFT); @@ -454,6 +470,12 @@ static void __init check_usemap_section_nr(int nid, #endif /* CONFIG_MEMORY_HOTREMOVE */ #ifdef CONFIG_SPARSEMEM_VMEMMAP +/* IAMROOT20 20240727 + * sizeof(struct page) = 64(default) + * PAGES_PER_SECTION = 1 << 15 + * + * -> 64 * (1<<15) = 0x20_0000(2M) + */ static unsigned long __init section_map_size(void) { return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE); @@ -496,6 +518,9 @@ static inline void __meminit sparse_buffer_free(unsigned long size) static void __init sparse_buffer_init(unsigned long size, int nid) { + /* IAMROOT20 20240727 + * MAX_DMA_ADDRESS = PAGE_OFFSET(0xffff_0000_0000_0000) + */ phys_addr_t addr = __pa(MAX_DMA_ADDRESS); WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */ /* @@ -550,12 +575,22 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, unsigned long pnum; struct page *map; + /* IAMROOT20 20240727 + * usemap 메모리 할당 + */ usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), mem_section_usage_size() * map_count); if (!usage) { pr_err("%s: node[%d] usemap allocation failed", __func__, nid); goto failed; } + /* IAMROOT20 20240727 + * section_map_size() : 2M + * + * page 구조체 배열 메모리 할당 + * - map_count * section_map_size() 크기 만큼 memblock에서 메모리를 할당 + * - sparsemap_buf가 시작 주소를 가리킴 + */ sparse_buffer_init(map_count * section_map_size(), nid); for_each_present_section_nr(pnum_begin, pnum) { unsigned long pfn = section_nr_to_pfn(pnum); @@ -608,7 +643,7 @@ void __init sparse_init(void) /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */ set_pageblock_order(); - /* IAMROOT20_END 20240720 */ + /* IAMROOT20_END 20240720 *//* IAMROOT20_START 20240727 */ for_each_present_section_nr(pnum_begin + 1, pnum_end) { int nid = sparse_early_nid(__nr_to_section(pnum_end)); From 61d7ef86256b09139558d5b36155743eca82657e Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 27 Jul 2024 13:06:22 +0000 Subject: [PATCH 079/104] IAMROOT20 20240727 - Add comments to mmzone.h, sparse.cc --- include/linux/mmzone.h | 5 +++++ mm/sparse.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7a0ed8f5e93c1..c973a16c88ea5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1726,6 +1726,11 @@ static inline bool movable_only_nodes(nodemask_t *nodes) #define PA_SECTION_SHIFT (SECTION_SIZE_BITS) #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) +/* IAMROOT20 20240727 + * SECTIONS_SHIFT = MAX_PHYSMEM_BITS - SECTION_SIZE_BITS + * = 48 - 27 + * 전체 PA에서 가질 수 있는 섹션의 개수 + */ #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) /* IAMROOT20 20240720 diff --git a/mm/sparse.c b/mm/sparse.c index db6c246e9b845..400b15e0bc689 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -428,6 +428,10 @@ static void __init check_usemap_section_nr(int nid, old_pgdat_snr = NR_MEM_SECTIONS; } + /* IAMROOT20 20240727 + * usage 물리 주소를 PAGE_SHIFT 만큼 옮기게 되면 pfn이 되고, + * 이 값을 가지고 section 넘버를 구한다 + */ usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT); if (usemap_snr == pgdat_snr) @@ -440,6 +444,7 @@ static void __init check_usemap_section_nr(int nid, old_usemap_snr = usemap_snr; old_pgdat_snr = pgdat_snr; + /* IAMROOT20_END 20240727 */ usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr)); if (usemap_nid != nid) { pr_info("node %d must be removed before remove section %ld\n", From 89cbf17481379148f9c28f51b4cc1457df3799ca Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 27 Jul 2024 13:06:41 +0000 Subject: [PATCH 080/104] IAMROOT20 20240727 - Update README.md --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 296ac5fb15d33..f7ddb5b6801e6 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,20 @@ ## 스터디 멤버 - 이대로 - 임채훈 +- 조수민 +- 최경건 ## History +### 66 주차 +- 2024.07.27 4명(이대로, 임채훈, 조수민, 최경건) +- sparse_init +- sparse_init_nid +- __populate_section_memmap + - vmemmap_populate + - vmemmap_populate_hugepages +- check_usemap_section_nr ~ + ### 65 주차 - 2024.07.20 2명(이대로, 임채훈) - arm64_hugetlb_cma_reserve From 7e25e3edeefa0b7c865e123d042e981c51b4324f Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 3 Aug 2024 21:06:41 +0900 Subject: [PATCH 081/104] IAMROOT20 20240803 - Add comments Signed-off-by: Daero Lee --- arch/arm64/mm/init.c | 17 +++++++++++++++++ include/linux/mmzone.h | 6 ++++++ mm/sparse.c | 26 +++++++++++++++++++++++++- 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9eb3b6cf55cc9..be3a90e354143 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -192,9 +192,19 @@ static void __init reserve_crashkernel(void) */ static phys_addr_t __init max_zone_phys(unsigned int zone_bits) { + /* IAMROOT20 20240803 + * DMA_BIT_MASK(32) = 0xFFFF_FFFF + */ phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits); phys_addr_t phys_start = memblock_start_of_DRAM(); + /* IAMROOT20 20240803 + * phys_start > 4G 인 경우 + * - zone_mask = 0xFFFF_FFFF_FFFF_FFFF + * + * 4G >= phys_start > zone_mask + * - zone_mask = 0xFFFF_FFFF + */ if (phys_start > U32_MAX) zone_mask = PHYS_ADDR_MAX; else if (phys_start > zone_mask) @@ -205,6 +215,13 @@ static phys_addr_t __init max_zone_phys(unsigned int zone_bits) static void __init zone_sizes_init(void) { + /* IAMROOT20 20240803 + * MAX_NR_ZONES 4 + * - config에 따라 MAX_NR_ZONES가 달라짐 + * - default config에서는 ZONE_DMA, ZONE_DMA32, ZONE_NORMAL, ZONE_MOVABLE + * + * dma32_phys_limit = 0xFFFF_FFFF + */ unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; unsigned int __maybe_unused acpi_zone_dma_bits; unsigned int __maybe_unused dt_zone_dma_bits; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c973a16c88ea5..cb2cd977f171d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1921,6 +1921,12 @@ enum { #ifdef CONFIG_ZONE_DEVICE #define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT) #endif +/* IAMROOT20 20240803 + * SECTION_MAP_LAST_BIT 4 + * BIT(SECTION_MAP_LAST_BIT) = 0b10000 = 0x10 + * + * SECTION_MAP_MASK = (~(0x10 - 1) = ~0xF = 0xFFFF_FFFF_FFFF_FFF0 + */ #define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1)) #define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT diff --git a/mm/sparse.c b/mm/sparse.c index 400b15e0bc689..f03a154b335a5 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -338,6 +338,9 @@ static void __meminit sparse_init_one_section(struct mem_section *ms, unsigned long pnum, struct page *mem_map, struct mem_section_usage *usage, unsigned long flags) { + /* IAMROOT20 20240803 + * SECTION_MAP_MASK = 0xFFFF_FFFF_FFFF_FFF0 + */ ms->section_mem_map &= ~SECTION_MAP_MASK; ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | SECTION_HAS_MEM_MAP | flags; @@ -434,6 +437,10 @@ static void __init check_usemap_section_nr(int nid, */ usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT); + /* IAMROOT20 20240803 + * usemap, node data(pgdat)이 다른 section에 할당된 경우 + * if문에서 return하지 않고 다음 코드로 넘어감 + */ if (usemap_snr == pgdat_snr) return; @@ -444,7 +451,11 @@ static void __init check_usemap_section_nr(int nid, old_usemap_snr = usemap_snr; old_pgdat_snr = pgdat_snr; - /* IAMROOT20_END 20240727 */ + /* IAMROOT20_END 20240727 *//* IAMROOT20_START 20240803 */ + /* IAMROOT20 20240803 + * usemap이 할당된 nid, 현재 메모리의 nid가 다른 경우 + * usemap, node data(pgdat)이 할당된 section도 다름 + */ usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr)); if (usemap_nid != nid) { pr_info("node %d must be removed before remove section %ld\n", @@ -457,6 +468,10 @@ static void __init check_usemap_section_nr(int nid, * gather other removable sections for dynamic partitioning. * Just notify un-removable section's number here. */ + /* IAMROOT20 20240803 + * usemap이 할당된 nid, 현재 메모리의 nid가 같은 경우 + * usemap, node data(pgdat)이 할당된 section은 다름 + */ pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n", usemap_snr, pgdat_snr, nid); } @@ -613,10 +628,16 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, goto failed; } check_usemap_section_nr(nid, usage); + /* IAMROOT20 20240803 + * mem_section 구조체 필드들(section_mem_map, usage) 설정 + */ sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage, SECTION_IS_EARLY); usage = (void *) usage + mem_section_usage_size(); } + /* IAMROOT20 20240803 + * sparsemap_buf에서 사용하지 않고 남아있는 메모리를 free + */ sparse_buffer_fini(); return; failed: @@ -657,6 +678,9 @@ void __init sparse_init(void) continue; } /* Init node with sections in range [pnum_begin, pnum_end) */ + /* IAMROOT20 20240803 + * 같은 node에 속한 section들을 초기화 + */ sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count); nid_begin = nid; pnum_begin = pnum_end; From 7b83334ef26aafb01f998af1c1623d74bee15800 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 3 Aug 2024 13:12:54 +0000 Subject: [PATCH 082/104] IAMROOT20 20240803 - Add comments to address.c, base.c --- drivers/of/address.c | 15 +++++++++++++++ drivers/of/base.c | 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/of/address.c b/drivers/of/address.c index 87532919c0347..01468f51c2000 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -877,6 +877,13 @@ static int parser_init(struct of_pci_range_parser *parser, { int rlen; + /* IAMROOT20_20240803 + * pna = 현재 노드의 parent의 #address-cells + * na = 현재 노드의 #address-cells 이지만, + * 없으면 #address-cells을 가진 parent까지 거슬러 올라감 + * ns = 현재 노드의 #size-cells 이지만, + * 없으면 #size-cells를 가진 parent까지 거슬러 올라감 + */ parser->node = node; parser->pna = of_n_addr_cells(node); parser->na = of_bus_n_addr_cells(node); @@ -884,6 +891,14 @@ static int parser_init(struct of_pci_range_parser *parser, parser->dma = !strcmp(name, "dma-ranges"); parser->bus = of_match_bus(node); + /* IAMROOT20_20240803 + * #address-cells = <3> + * #size-cells = <2> + * + * dma-ranges = <0x02000000 0 0x00000000 0x80000000 0 0x20000000>; + * parser->range = dma-ranges의 property 구조체의 value 값 + * rlen = 6 + */ parser->range = of_get_property(node, name, &rlen); if (parser->range == NULL) return -ENOENT; diff --git a/drivers/of/base.c b/drivers/of/base.c index ae2c3763986af..5101b5e44cb9d 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1238,6 +1238,7 @@ int of_phandle_iterator_next(struct of_phandle_iterator *it) /* If phandle is 0, then it is an empty entry with no arguments. */ it->phandle = be32_to_cpup(it->cur++); + /* IAMROOT20_END 20240803 */ if (it->phandle) { /* From cee455d9d9f88b5706f8c1e7e1aa278f55dfef81 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 3 Aug 2024 13:22:06 +0000 Subject: [PATCH 083/104] IAMROOT20 20240803 - update README.md --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index f7ddb5b6801e6..01f448a7b25f2 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,22 @@ ## History +### 67 주차 +- 2024.08.03 4명(이대로, 임채훈, 조수민, 최경건) +- sparse_init +- zone_sizes_init + - acpi_iort_dma_get_max_cpu_address + - of_dma_get_max_cpu_address + - of_dma_range_parser_init + - for_each_of_range + - of_pci_range_parser_one + - of_translate_dma_address + - __of_get_dma_parent + - of_parse_phandle_with_args + - __of_parse_phandle_with_args + - of_for_each_phandle + - of_phandle_iterator_next + ### 66 주차 - 2024.07.27 4명(이대로, 임채훈, 조수민, 최경건) - sparse_init From 2fc05dfacbc4ad6e9bab6e054998e500ad3db64e Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 10 Aug 2024 13:35:01 +0000 Subject: [PATCH 084/104] IAMROOT20 20240810 - Add comments --- mm/mm_init.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index eeabcb294959b..da6acf2ab4164 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -446,6 +446,11 @@ static void __init find_zone_movable_pfns_for_nodes(void) restart: /* Spread kernelcore memory as evenly as possible throughout nodes */ kernelcore_node = required_kernelcore / usable_nodes; + + /* IAMROOT20_20240810 + * 노드 안의 range를 돌면서, required_kernelcore을 갱신하고 + * zone_movable_pfn의 시작점을 설정한다. + */ for_each_node_state(nid, N_MEMORY) { unsigned long start_pfn, end_pfn; @@ -472,17 +477,35 @@ static void __init find_zone_movable_pfns_for_nodes(void) if (start_pfn >= end_pfn) continue; + /* IAMROOT20_20240810 + * ZONE_NORMAL의 시작 pfn보다 아래에 있는 range의 경우, + * 해당 영역을 제외하여 kernelcore_remaining과 + * required_kernelcore을 다시 계산한다. + * 그리고 start_pfn을 ZONE_NORMAL의 시작점으로 갱신한다. + */ /* Account for what is only usable for kernelcore */ if (start_pfn < usable_startpfn) { unsigned long kernel_pages; kernel_pages = min(end_pfn, usable_startpfn) - start_pfn; + /* IAMROOT20_20240810 + * required_kernelcore : 전체 노드에 필요한 kernelcore의 양 + * kernelcore_remaining : 현재 노드에 배정된 kernelcore의 양 + */ kernelcore_remaining -= min(kernel_pages, kernelcore_remaining); required_kernelcore -= min(kernel_pages, required_kernelcore); + /* IAMROOT20_20240810 + * 만약 현재 range가 ZONE_NORMAL 아래에 완전히 포함되는 경우, + * - zone_movable_pfn을 갱신하고 다음 range로 continue + * + * 만약 현재 range가 ZONE_NORMAL과 겹치는 경우, + * - 아래 조건문을 타지 않고, start_pfn을 + * ZONE_NORMAL의 시작점으로 갱신한다 + */ /* Continue if range is now fully accounted */ if (end_pfn <= usable_startpfn) { @@ -498,6 +521,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) start_pfn = usable_startpfn; } + /* * The usable PFN range for ZONE_MOVABLE is from * start_pfn->end_pfn. Calculate size_pages as the @@ -521,6 +545,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) } } + /* IAMROOT20_END 20240810 */ /* * If there is still required_kernelcore, we do another pass with one * less node in the count. This will push zone_movable_pfn[nid] further @@ -546,7 +571,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) out: /* restore the node_state */ - node_states[N_MEMORY] = saved_node_state; + node_states[N_MEMORY] = saved_node_state;OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO } static void __meminit __init_single_page(struct page *page, unsigned long pfn, @@ -577,7 +602,18 @@ struct mminit_pfnnid_cache { unsigned long last_start; unsigned long last_end; int last_nid; -}; +OOOOOOOOOOOOOOOOOOOOOO\OOOOOOOOOO +OOOOOO +OOOOO +OOOOO +OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO +OOOOO +sOdaOOOfOOOOsaOOdOOOfOOOOOasOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOcOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOcOOOOOOOOOOOOOOOO1OOOOOO`OOOOO`OOOOO`OOOOOOOOOOOOOO +OOOOOOOOOOOOOOOOOOOOO +OOOOOOOOOOOOOOOOO +`OOOOOOOOOOOOOO +OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOxOOOOOOxOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO +};OOOOOOOOOOOOOOOOOOOOOOOO static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata; From 05391333896224e96d43c9a463113ba3981857f5 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 10 Aug 2024 13:42:52 +0000 Subject: [PATCH 085/104] IAMROOT20 20240810 - Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 01f448a7b25f2..298efa45c3467 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,11 @@ ## History +### 68 주차 +- 2024.08.10 4명(이대로, 임채훈, 조수민, 최경건) +- free_area_init + - find_zone_movable_pfns_for_nodes + ### 67 주차 - 2024.08.03 4명(이대로, 임채훈, 조수민, 최경건) - sparse_init From 11833f3438ce633b799409eb5f82f94e0db6e0d4 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 10 Aug 2024 13:44:43 +0000 Subject: [PATCH 086/104] IAMROOT20 20240810 - fix typo mistake --- mm/mm_init.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index da6acf2ab4164..c9af49f1e0c4c 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -571,7 +571,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) out: /* restore the node_state */ - node_states[N_MEMORY] = saved_node_state;OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO + node_states[N_MEMORY] = saved_node_state; } static void __meminit __init_single_page(struct page *page, unsigned long pfn, @@ -602,18 +602,7 @@ struct mminit_pfnnid_cache { unsigned long last_start; unsigned long last_end; int last_nid; -OOOOOOOOOOOOOOOOOOOOOO\OOOOOOOOOO -OOOOOO -OOOOO -OOOOO -OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO -OOOOO -sOdaOOOfOOOOsaOOdOOOfOOOOOasOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOcOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOcOOOOOOOOOOOOOOOO1OOOOOO`OOOOO`OOOOO`OOOOOOOOOOOOOO -OOOOOOOOOOOOOOOOOOOOO -OOOOOOOOOOOOOOOOO -`OOOOOOOOOOOOOO -OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOxOOOOOOxOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO -};OOOOOOOOOOOOOOOOOOOOOOOO +}; static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata; From bc5ed1b9abd3ceeae7095cd80d420492aa39f959 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Thu, 15 Aug 2024 19:31:01 +0900 Subject: [PATCH 087/104] IAMROOT20 20240810 - Add comments Signed-off-by: Daero Lee --- arch/arm64/mm/init.c | 4 +++ drivers/of/address.c | 66 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index be3a90e354143..877efde030663 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -229,6 +229,10 @@ static void __init zone_sizes_init(void) #ifdef CONFIG_ZONE_DMA acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address()); + /* IAMROOT20 20240810 + * of_dma_get_max_cpu_address(NULL) + * - "dma-ranges"의 dma 주소를 cpu 주소로 변환했을 때, max 주소 값을 return + */ dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL)); zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits); arm64_dma_phys_limit = max_zone_phys(zone_dma_bits); diff --git a/drivers/of/address.c b/drivers/of/address.c index 01468f51c2000..419f16357600e 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -713,10 +713,28 @@ struct device_node *__of_get_dma_parent(const struct device_node *np) struct of_phandle_args args; int ret, index; + /* IAMROOT20 20240810 + * index = of_property_match_string(np, "interconnect-names", "dma-mem"); + * interconnect-names property에서 "dma-mem"이 몇 번째 인지를 return + * + * ex) interconnect-names = "dma-mem", "write"; + * -> index = 0 + */ index = of_property_match_string(np, "interconnect-names", "dma-mem"); if (index < 0) return of_get_parent(np); + /* IAMROOT20 20240810 + * #interconnect-cells 값이 index인 node를 찾는다 + * + * ex) emc: external-memory-controller@2c60000 { + * ... + * #interconnect-cells = <0>; + * ... + * }; + * + * -> #interconnect-cells 값이 0인 위 emc node를 return + */ ret = of_parse_phandle_with_args(np, "interconnects", "#interconnect-cells", index, &args); @@ -926,6 +944,12 @@ EXPORT_SYMBOL_GPL(of_pci_dma_range_parser_init); struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser, struct of_pci_range *range) { + /* IAMROOT20 20240810 + * ex) na = 3, ns = 2, pna = 1 + * dma-ranges = <0x02000000 0 0x00000000 0x00000000 0 0x40000000>; + * |---------------------| |--------| |----------| + * na pna ns + */ int na = parser->na; int ns = parser->ns; int np = parser->pna + na + ns; @@ -937,25 +961,48 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser, if (!parser->range || parser->range + np > parser->end) return NULL; + /* IAMROOT20 20240810 + * flags 값을 읽어 저장 + * dma-ranges = <0x02000000 0 0x00000000 ... >; + * ^--- flags + */ range->flags = parser->bus->get_flags(parser->range); /* A extra cell for resource flags */ if (parser->bus->has_flags) busflag_na = 1; + /* IAMROOT20 20240810 + * bus 주소를 읽음 + * dma-ranges = <0x02000000 0 0x00000000 ... >; + * |----------| + * bus_addr + */ range->bus_addr = of_read_number(parser->range + busflag_na, na - busflag_na); + /* IAMROOT20 20240810 + * cpu 주소를 변환하여 저장 + */ if (parser->dma) range->cpu_addr = of_translate_dma_address(parser->node, parser->range + na); else range->cpu_addr = of_translate_address(parser->node, parser->range + na); + /* IAMROOT20 20240810 + * bus 크기를 저장 + */ range->size = of_read_number(parser->range + parser->pna + na, ns); + /* IAMROOT20 20240810 + * 다음 range로 이동 + */ parser->range += np; /* Now consume following elements while they are contiguous */ + /* IAMROOT20 20240810 + * range가 연속으로 이어져 있는 경우를 처리하기 위함 + */ while (parser->range + np <= parser->end) { u32 flags = 0; u64 bus_addr, cpu_addr, size; @@ -970,12 +1017,20 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser, parser->range + na); size = of_read_number(parser->range + parser->pna + na, ns); + /* IAMROOT20 20240810 + * - flags가 다르거나 + * - bus 주소, cpu 주소가 연속적이지 않은 경우 + * => break + */ if (flags != range->flags) break; if (bus_addr != range->bus_addr + range->size || cpu_addr != range->cpu_addr + range->size) break; + /* IAMROOT20 20240810 + * 주소가 연속적이면 size만 더해줌 + */ range->size += size; parser->range += np; } @@ -1125,9 +1180,16 @@ phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np) if (!np) np = of_root; + /* IAMROOT20_START 20240810 */ + /* IAMROOT20 20240810 + * np(현재 node)에서 "dma-ranges" property가 있는지 확인 + */ ranges = of_get_property(np, "dma-ranges", &len); if (ranges && len) { of_dma_range_parser_init(&parser, np); + /* IAMROOT20 20240810 + * parser에 저장된 dma-ranges 영역을 돌면서, cpu_end의 최대값을 구함 + */ for_each_of_range(&parser, &range) if (range.cpu_addr + range.size > cpu_end) cpu_end = range.cpu_addr + range.size - 1; @@ -1136,6 +1198,10 @@ phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np) max_cpu_addr = cpu_end; } + /* IAMROOT20 20240810 + * np(현재 node)의 자식 node에서 of_dma_get_max_cpu_address 함수를 + * 재귀적으로 호출 + */ for_each_available_child_of_node(np, child) { subtree_max_addr = of_dma_get_max_cpu_address(child); if (max_cpu_addr > subtree_max_addr) From 54b6444c681136c2083a134a12a79336e71b2025 Mon Sep 17 00:00:00 2001 From: Leem ChaeHoon Date: Tue, 13 Aug 2024 11:36:54 +0900 Subject: [PATCH 088/104] IAMROOT20 20240813 - Add comment to sparse_init zone_sizes_init --- arch/arm64/include/asm/sparsemem.h | 3 +++ arch/arm64/mm/init.c | 9 +++++++++ include/linux/mmzone.h | 24 +++++++++++++++++++++--- include/linux/page-flags-layout.h | 3 +++ mm/mm_init.c | 4 ++++ mm/page_alloc.c | 4 ++++ mm/sparse.c | 7 +++++-- 7 files changed, 49 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 5f54376210293..7fc1693261d01 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -5,6 +5,9 @@ #ifndef __ASM_SPARSEMEM_H #define __ASM_SPARSEMEM_H +/* IAMROOT20 20240809 + * MAX_PHYSMEM_BITS 48 + */ #define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS /* diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 877efde030663..c8ba15a0c4613 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -62,6 +62,10 @@ EXPORT_SYMBOL(memstart_addr); * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, * otherwise it is empty. */ +/* IAMROOT20 20240810 + * arm64_dma_phys_limit 0x1_0000_0000 + * zone_sizes_init() + */ phys_addr_t __ro_after_init arm64_dma_phys_limit; /* Current arm64 boot protocol requires 2MB alignment */ @@ -222,6 +226,11 @@ static void __init zone_sizes_init(void) * * dma32_phys_limit = 0xFFFF_FFFF */ + /* IAMROOT20 20240810 + * max_zone_pfns[ZONE_DMA] = 0x100000 + * max_zone_pfns[ZONE_DMA32] = 0x100000 + * max_zone_pfns[ZONE_NORMAL] = max_pfn + */ unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; unsigned int __maybe_unused acpi_zone_dma_bits; unsigned int __maybe_unused dt_zone_dma_bits; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cb2cd977f171d..d12b86a7e74ba 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1723,6 +1723,10 @@ static inline bool movable_only_nodes(nodemask_t *nodes) * PA_SECTION_SHIFT physical address to/from section number * PFN_SECTION_SHIFT pfn to/from section number */ +/* IAMROOT20 20240809 + * PA_SECTION_SHIFT 27 + * PFN_SECTION_SHIFT 15 = 27 - 12 + */ #define PA_SECTION_SHIFT (SECTION_SIZE_BITS) #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) @@ -1731,12 +1735,15 @@ static inline bool movable_only_nodes(nodemask_t *nodes) * = 48 - 27 * 전체 PA에서 가질 수 있는 섹션의 개수 */ +/* IAMROOT20 20240809 + * NR_MEM_SECTIONS SIZE_2M + */ #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) /* IAMROOT20 20240720 * ex) 4K인 경우, - * PAGES_PER_SECTION (1 << PFN_SECTION_SHIFT) = (1 << 15) - * PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) = 0xFFFF_FFFF_FFFF_8000 + * PAGES_PER_SECTION 0x8000 (1 << PFN_SECTION_SHIFT) = (1 << 15) + * PAGE_SECTION_MASK 0xFFFF_FFFF_FFFF_8000 (~0x7fff) */ #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) @@ -1836,6 +1843,9 @@ struct mem_section { }; #ifdef CONFIG_SPARSEMEM_EXTREME +/* IAMROOT20 20240809 + * SECTIONS_PER_ROOT 256 + */ #define SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) #else #define SECTIONS_PER_ROOT 1 @@ -1844,7 +1854,7 @@ struct mem_section { /* IAMROOT20 20240720 * ex) 4K, PA=48 인 경우 * NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT) - * = 2^21 / 2^8 = 2^13(8096) + * = 2^21 / 2^8 = 2^13 = 8192 * * NR_MEM_SECTIONS (1 << SECTIONS_SHIFT) = (1 << 21) * SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) = 21 @@ -1853,6 +1863,11 @@ struct mem_section { * * SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) = 4K / 16 = 256(2^8) */ +/* IAMROOT20 20240809 + * SECTION_NR_TO_ROOT(sec) (sec / 256) + * NR_SECTION_ROOTS 8196 + * SECTION_ROOT_MASK 0x00ff + */ #define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT) #define NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT) #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) @@ -1882,6 +1897,9 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) if (!mem_section || !mem_section[root]) return NULL; #endif + /* IAMROOT20 20240809 + * SECTION_ROOT_MASK 0x00ff + */ return &mem_section[root][nr & SECTION_ROOT_MASK]; } extern size_t mem_section_usage_size(void); diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 7d79818dc0651..069a9f68c6318 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -28,6 +28,9 @@ #ifdef CONFIG_SPARSEMEM #include +/* IAMROOT20 20240809 + * SECTIONS_SHIFT 21 + */ #define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) #else #define SECTIONS_SHIFT 0 diff --git a/mm/mm_init.c b/mm/mm_init.c index c9af49f1e0c4c..0aa2ae01b8037 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -215,6 +215,10 @@ static int __init mm_sysfs_init(void) } postcore_initcall(mm_sysfs_init); +/* IAMROOT20 20240810 + * free_area_init()에서 초기화함 + * + */ static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata; static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata; static unsigned long zone_movable_pfn[MAX_NUMNODES] __initdata; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9a8045040a562..ff93acf957324 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -377,6 +377,10 @@ int watermark_scale_factor = 10; bool mirrored_kernelcore __initdata_memblock; /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ +/* IAMROOT20 20240810 + * moveable_zone = ZONE_NORMAL + * find_usable_zone_for_movable() 설정됨. + */ int movable_zone; EXPORT_SYMBOL(movable_zone); diff --git a/mm/sparse.c b/mm/sparse.c index f03a154b335a5..668670842eb6f 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -146,7 +146,7 @@ static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, * MAX_PHYSMEM_BITS = 48 * PAGE_SHIFT = 12 * - * max_sparsemem_pfn = 1 << 36 + * max_sparsemem_pfn = 1 << 36 = 0x10_0000_0000 */ unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); @@ -252,11 +252,14 @@ static void __init memory_present(int nid, unsigned long start, unsigned long en /* IAMROOT20 20240720 * ex) 4K, PA=48인 경우, - * NR_SECTION_ROOTS = 2^13(8096) + * NR_SECTION_ROOTS = 2^13(8192) * * INTERNODE_CACHE_SHIFT = 6 * align = 64 */ + /* IAMROOT20 20240809 + * size = 16 * 8192 = 131072 + */ size = sizeof(struct mem_section *) * NR_SECTION_ROOTS; align = 1 << (INTERNODE_CACHE_SHIFT); mem_section = memblock_alloc(size, align); From dfdc0a9898805f032d648344489d69b406ffb0bf Mon Sep 17 00:00:00 2001 From: Leem ChaeHoon Date: Sat, 17 Aug 2024 21:30:03 +0900 Subject: [PATCH 089/104] IAMROOT20 20240817 - Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 298efa45c3467..8edd781d4c08f 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,10 @@ ## History +### 69 주차 +- 2024.08.10 1명(임채훈) +- [문C 블로그 zone_sizes_init](http://jake.dothome.co.kr/free_area_init_node) 정독 + ### 68 주차 - 2024.08.10 4명(이대로, 임채훈, 조수민, 최경건) - free_area_init From 453cf0a1b85496fc900ac7eb63d89e48ff4de66d Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 24 Aug 2024 22:06:11 +0900 Subject: [PATCH 090/104] IAMROOT20 2024.08.24 - Add comments Signed-off-by: Daero Lee --- README.md | 6 +++++ include/linux/minmax.h | 8 +++++++ mm/mm_init.c | 52 ++++++++++++++++++++++++++++++++++++++++-- mm/sparse.c | 10 ++++++++ 4 files changed, 74 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8edd781d4c08f..796b2ab70046d 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,12 @@ ## History +### 70 주차 +- 2024.08.24 3명(이대로, 임채훈, 최경건) +- free_area_init + - free_area_init_node + - free_area_init_core + ### 69 주차 - 2024.08.10 1명(임채훈) - [문C 블로그 zone_sizes_init](http://jake.dothome.co.kr/free_area_init_node) 정독 diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 396df1121bffb..c1293b90e44cc 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -108,6 +108,14 @@ * This macro does strict typechecking of @lo/@hi to make sure they are of the * same type as @val. See the unnecessary pointer comparisons. */ +/* IAMROOT20 20240824 + * clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) + * - val이 [lo, hi] 영역을 벗어나면 lo로 올리거나 hi로 내림 + * - val이 [lo, hi] 사이에 있는 경우 변경 x + * ex) clamp(1, 10, 20) = 10 + * clamp(22, 10, 20) = 20 + * clamp(15, 10, 20) = 15 + */ #define clamp(val, lo, hi) __careful_clamp(val, lo, hi) /* diff --git a/mm/mm_init.c b/mm/mm_init.c index 0aa2ae01b8037..f6d08cd140819 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -217,7 +217,7 @@ postcore_initcall(mm_sysfs_init); /* IAMROOT20 20240810 * free_area_init()에서 초기화함 - * + * - [ZONE_MOVABLE]은 설정하지 않음 */ static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata; static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata; @@ -339,6 +339,10 @@ static void __init find_usable_zone_for_movable(void) * memory. When they don't, some nodes will have more kernelcore than * others */ +/* IAMROOT20 20240824 + * zone_movable_pfn[]에 movable zone의 start pfn 설정 + * - 커널 파라미터 movable_node, kernelcore, movablecore 참조 + */ static void __init find_zone_movable_pfns_for_nodes(void) { int i, nid; @@ -549,7 +553,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) } } - /* IAMROOT20_END 20240810 */ + /* IAMROOT20_END 20240810 *//* IAMROOT20_START 20240824 */ /* * If there is still required_kernelcore, we do another pass with one * less node in the count. This will push zone_movable_pfn[nid] further @@ -1146,18 +1150,31 @@ static void __init adjust_zone_range_for_zone_movable(int nid, /* Only adjust if ZONE_MOVABLE is on this node */ if (zone_movable_pfn[nid]) { /* Size ZONE_MOVABLE */ + /* IAMROOT20 20240824 + * movable_zone = ZONE_NORMAL + */ if (zone_type == ZONE_MOVABLE) { *zone_start_pfn = zone_movable_pfn[nid]; *zone_end_pfn = min(node_end_pfn, arch_zone_highest_possible_pfn[movable_zone]); /* Adjust for ZONE_MOVABLE starting within this range */ + /* IAMROOT20 20240824 + * zone_type이 ZONE_MOVABLE이 아니고, + * zone_start_pfn < zone_movable_pfn < zone_end_pfn 인 경우 + * -> movable zone을 분리한다 + */ } else if (!mirrored_kernelcore && *zone_start_pfn < zone_movable_pfn[nid] && *zone_end_pfn > zone_movable_pfn[nid]) { *zone_end_pfn = zone_movable_pfn[nid]; /* Check if this whole range is within ZONE_MOVABLE */ + /* IAMROOT20 20240824 + * zone_type이 ZONE_MOVABLE이 아닌데, ZONE_MOVABLE에 포함되는 경우 + * (zone_start_pfn > zone_movable_pfn) + * -> 해당 영역 size를 0으로 만듦 + */ } else if (*zone_start_pfn >= zone_movable_pfn[nid]) *zone_start_pfn = *zone_end_pfn; } @@ -1267,11 +1284,17 @@ static unsigned long __init zone_spanned_pages_in_node(int nid, /* Get the start and end of the zone */ *zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); *zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high); + /* IAMROOT20 20240824 + * ZONE_MOVABLE을 고려하여 zone_start_pfn, zone_end_pfn을 조정한다 + */ adjust_zone_range_for_zone_movable(nid, zone_type, node_start_pfn, node_end_pfn, zone_start_pfn, zone_end_pfn); /* Check that this node has pages within the zone's required range */ + /* IAMROOT20 20240824 + * zone이 node 영역을 벗어난 경우, return 0 + */ if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn) return 0; @@ -1296,11 +1319,17 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat, unsigned long spanned, absent; unsigned long size, real_size; + /* IAMROOT20 20240824 + * node 영역 안에 있는 zone영역을 구함(hole 포함) + */ spanned = zone_spanned_pages_in_node(pgdat->node_id, i, node_start_pfn, node_end_pfn, &zone_start_pfn, &zone_end_pfn); + /* IAMROOT20 20240824 + * (node 영역 안에 있는) zone 영역에서 hole의 크기를 구함 + */ absent = zone_absent_pages_in_node(pgdat->node_id, i, node_start_pfn, node_end_pfn); @@ -1340,6 +1369,10 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages, * populated regions may not be naturally aligned on page boundary. * So the (present_pages >> 4) heuristic is a tradeoff for that. */ + /* IAMROOT20 20240824 + * spanned_pages가 present_pages 보다 25% 이상 큰 경우, + * pages를 present_pages로 변경 + */ if (spanned_pages > present_pages + (present_pages >> 4) && IS_ENABLED(CONFIG_SPARSEMEM)) pages = present_pages; @@ -1576,6 +1609,9 @@ static void __init free_area_init_core(struct pglist_data *pgdat) * is used by this zone for memmap. This affects the watermark * and per-cpu initialisations */ + /* IAMROOT20 20240824 + * 현재 zone의 모든 page의 디스크립터(struct page) 크기의 합을 구함(page 단위) + */ memmap_pages = calc_memmap_size(size, freesize); if (!is_highmem_idx(j)) { if (freesize >= memmap_pages) { @@ -1588,6 +1624,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat) zone_names[j], memmap_pages, freesize); } + /* IAMROOT20_END 20240824 */ /* Account for reserved pages */ if (j == 0 && freesize > dma_reserve) { freesize -= dma_reserve; @@ -1741,6 +1778,9 @@ static void __init free_area_init_node(int nid) calculate_node_totalpages(pgdat, start_pfn, end_pfn); + /* IAMROOT20 20240824 + * FLATMEM인 경우여서 분석x + */ alloc_node_mem_map(pgdat); pgdat_set_deferred_range(pgdat); @@ -1872,6 +1912,10 @@ void __init free_area_init(unsigned long *max_zone_pfn) * enable future "sub-section" extensions of the memory map. */ pr_info("Early memory node ranges\n"); + /* IAMROOT20 20240824 + * 모든 mem range를 돌면서, + * subsection 단위(2M)로 mem_section->usage->subsection_map에 set + */ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, (u64)start_pfn << PAGE_SHIFT, @@ -1885,6 +1929,10 @@ void __init free_area_init(unsigned long *max_zone_pfn) for_each_node(nid) { pg_data_t *pgdat; + /* IAMROOT20 20240824 + * node online이 아닌 경우 -> memory-less node로 설정 + * node online인 경우 -> N_MEMORY 설정, check_for_memory() 호출 + */ if (!node_online(nid)) { pr_info("Initializing node %d as memoryless\n", nid); diff --git a/mm/sparse.c b/mm/sparse.c index 668670842eb6f..69df3b181c695 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -204,6 +204,9 @@ static inline unsigned long first_present_section_nr(void) static void subsection_mask_set(unsigned long *map, unsigned long pfn, unsigned long nr_pages) { + /* IAMROOT20 20240824 + * subsection의 start, end 인덱스를 구해서, subsection_map에 set + */ int idx = subsection_map_index(pfn); int end = subsection_map_index(pfn + nr_pages - 1); @@ -222,9 +225,16 @@ void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) struct mem_section *ms; unsigned long pfns; + /* IAMROOT20 20240824 + * pfns : 현재 section의 pfn 수 + * pfn : 현재 section의 start pfn + */ pfns = min(nr_pages, PAGES_PER_SECTION - (pfn & ~PAGE_SECTION_MASK)); ms = __nr_to_section(nr); + /* IAMROOT20 20240824 + * subsection 단위로(2M) mem_section->usage->subsection_map에 표시 + */ subsection_mask_set(ms->usage->subsection_map, pfn, pfns); pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr, From ce9a961fee6393c78b86382103a9aa61bfe91f36 Mon Sep 17 00:00:00 2001 From: Leem ChaeHoon Date: Sat, 31 Aug 2024 15:06:35 +0900 Subject: [PATCH 091/104] IAMROOT20 2024.08.24 Add additional comments --- drivers/base/arch_numa.c | 4 ++++ mm/page_alloc.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 17cc3268605d8..fb2c0a22220a3 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -15,6 +15,10 @@ #include +/* IAMROOT20 20240824 + * setup_arch -> bootmem_init -> arch_numa_init -> numa_init -> + * numa_register_nodes -> setup_node_data + */ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); /* IAMROOT20 20240525 diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ff93acf957324..6e896cac1a32a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -388,6 +388,9 @@ EXPORT_SYMBOL(movable_zone); /* IAMROOT20 20240525 * nr_node_ids = 16 */ +/* IAMROOT20 20240824 + * setup_nr_node_ids() + */ unsigned int nr_node_ids __read_mostly = MAX_NUMNODES; unsigned int nr_online_nodes __read_mostly = 1; EXPORT_SYMBOL(nr_node_ids); From 6ec3c84fcfa456d48104e6a2ac3d1f14a4e291a9 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 31 Aug 2024 13:11:21 +0000 Subject: [PATCH 092/104] IAMROOT20 20240831 - Update README.md --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 796b2ab70046d..e7862d856fb29 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,12 @@ ## History +### 71 주차 +- 2024.08.31 2명(임채훈, 최경건) +- free_area_init_core + - zone_init_innternals + - set_pageblock_order + ### 70 주차 - 2024.08.24 3명(이대로, 임채훈, 최경건) - free_area_init From 2f3c47bb847515d9dd8e49ce9793f379d3f5e396 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 31 Aug 2024 13:11:34 +0000 Subject: [PATCH 093/104] IAMROOT20 20240831 - Add comments --- mm/mm_init.c | 9 ++++++++- mm/page_alloc.c | 12 ++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index f6d08cd140819..098e298bf92bb 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1624,13 +1624,19 @@ static void __init free_area_init_core(struct pglist_data *pgdat) zone_names[j], memmap_pages, freesize); } - /* IAMROOT20_END 20240824 */ + /* IAMROOT20_END 20240824 */ /* IAMROOT20_START 20240831 */ /* Account for reserved pages */ if (j == 0 && freesize > dma_reserve) { freesize -= dma_reserve; pr_debug(" %s zone: %lu pages reserved\n", zone_names[0], dma_reserve); } + /* IAMROOT20 20240831 + * 위에서 freesize를 모두 계산하고, + * highmem이 아닌 zone인 경우, 전역 변수 nr_kernel_pages에 freesize를 더한다. + * + * memmap한 페이지와 dma_reserve를 제외한 freesize를 nr_all_pages에 더한다. + */ if (!is_highmem_idx(j)) nr_kernel_pages += freesize; /* Charge for highmem memmap if there are enough kernel pages */ @@ -1648,6 +1654,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat) if (!size) continue; + /* IAMROOT20_END 20240831 */ set_pageblock_order(); setup_usemap(zone); init_currently_empty_zone(zone, zone->zone_start_pfn, size); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6e896cac1a32a..295c12a245201 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5938,6 +5938,18 @@ void __ref build_all_zonelists(pg_data_t *pgdat) #endif } +/* IAMROOT20 20240831 + * zone_managed_pages = 4GB로 가정할 경우, + * batch = min(a ^ 32 >> 10, 2 ^ 20 / 2 ^ 12) + * batch = min(2048, 256) + * batch = 256 + * batch = 64 (batch /= 4) + * batch = rounddown_pow_of_two(64 + 32) - 1 + * (1UL << (fls(96) - 1)) - 1 + * (1UL << (7 - 1)) - 1 + * (64 - 1) + * batch = 63 + */ static int zone_batchsize(struct zone *zone) { #ifdef CONFIG_MMU From 89f3830102c151370b3618eed25c231a4eefb855 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 7 Sep 2024 13:15:54 +0000 Subject: [PATCH 094/104] IAMROOT20 20240907 - Update README.md --- README.md | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index e7862d856fb29..e440d48397000 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,11 @@ ## History +### 72 주차 +- 2024.09.07 2명(임채훈, 최경건) +- free_area_init_node + - free_area_init_core + ### 71 주차 - 2024.08.31 2명(임채훈, 최경건) - free_area_init_core @@ -40,17 +45,6 @@ - 2024.08.03 4명(이대로, 임채훈, 조수민, 최경건) - sparse_init - zone_sizes_init - - acpi_iort_dma_get_max_cpu_address - - of_dma_get_max_cpu_address - - of_dma_range_parser_init - - for_each_of_range - - of_pci_range_parser_one - - of_translate_dma_address - - __of_get_dma_parent - - of_parse_phandle_with_args - - __of_parse_phandle_with_args - - of_for_each_phandle - - of_phandle_iterator_next ### 66 주차 - 2024.07.27 4명(이대로, 임채훈, 조수민, 최경건) From 9072f56825a69a6131d4c6dd8087bb1bd9f564d5 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 7 Sep 2024 13:16:10 +0000 Subject: [PATCH 095/104] IAMROOT20 20240907 - Add comments --- mm/mm_init.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 098e298bf92bb..b9759abc4cf01 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1473,6 +1473,13 @@ void __meminit init_currently_empty_zone(struct zone *zone, * round what is now in bits to nearest long in bits, then return it in * bytes. */ +/* IAMROOT20 20240907 + * zone_start_pfn이 페이지 블록 경계에 맞도록 조정한 값이 zonesize. + * zonesize를 바탕으로 전체 페이지 개수를 구한 다음, 4bit를 곱해 + * usemap을 담을 공간을 bit 단위로 계산한다. + * 그리고 8 bytes(64) 단위로 올림하여, usemap을 담을 공간을 넉넉하게 계산하고, + * 이를 bytes 단위로 변환 후, 그 갯수를 반환한다. + */ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize) { unsigned long usemapsize; @@ -1486,6 +1493,10 @@ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned l return usemapsize / 8; } +/* IAMROOT20 20240907 + * __ref : .ref.text 섹션에 함수를 만들되, inline이 안되게 한다. + * 커널 초기화 이후에도 참조될 수 있게 된다. + */ static void __ref setup_usemap(struct zone *zone) { unsigned long usemapsize = usemap_size(zone->zone_start_pfn, @@ -1654,7 +1665,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat) if (!size) continue; - /* IAMROOT20_END 20240831 */ + /* IAMROOT20_END 20240831 */ /* IAMROOT20_START 20240907 */ set_pageblock_order(); setup_usemap(zone); init_currently_empty_zone(zone, zone->zone_start_pfn, size); @@ -1791,6 +1802,7 @@ static void __init free_area_init_node(int nid) alloc_node_mem_map(pgdat); pgdat_set_deferred_range(pgdat); + /* IAMROOT20_END 20240907 */ free_area_init_core(pgdat); lru_gen_init_pgdat(pgdat); } From ddc47d7880a0d738ef94cf0a0dea3a05420ef7a5 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 21 Sep 2024 13:04:52 +0000 Subject: [PATCH 096/104] IAMROOT20 20240921 - Add comments --- mm/mm_init.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index b9759abc4cf01..8783cdbb9a496 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -711,6 +711,7 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) nr_initialised = 0; } + /* IAMROOT20_END 20240921 */ /* Always populate low zones for address-constrained allocations */ if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) return false; @@ -907,6 +908,9 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone * function. They do not exist on hotplugged memory. */ if (context == MEMINIT_EARLY) { + /* IAMROOT20 20240921 + * mirrored_kernelcore이고 ZONE_MOVABLE인 경우만 true + */ if (overlap_memmap_init(zone, &pfn)) continue; if (defer_init(nid, pfn, zone_end_pfn)) { @@ -1802,11 +1806,15 @@ static void __init free_area_init_node(int nid) alloc_node_mem_map(pgdat); pgdat_set_deferred_range(pgdat); - /* IAMROOT20_END 20240907 */ + /* IAMROOT20_END 20240907 */ /* IAMROOT20_START 20240921 */ free_area_init_core(pgdat); lru_gen_init_pgdat(pgdat); } +/* IAMROOT20 20240921 + * N_MEMORY : 일반적인 물리 메모리 블록. + * N_NORMAL_MEMORY : CPU에서 접근 가능한 물리 메모리 블록. DMA는 제외. + */ /* Any regular or high memory on that node ? */ static void check_for_memory(pg_data_t *pgdat, int nid) { From a857d15de51ca65c070c86870050dedaedf260ab Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 21 Sep 2024 13:10:19 +0000 Subject: [PATCH 097/104] IAMROOT20 20240907 - Update README.md --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index e440d48397000..a4c2c5ddc1602 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,13 @@ ## History +### 73 주차 +- 2024.09.21 2명(임채훈, 최경건) +- free_area_init + - memmap_init + - memmap_init_range + - defer_init + ### 72 주차 - 2024.09.07 2명(임채훈, 최경건) - free_area_init_node From 69c90caf70c9a05c91ae362d3dac135310cc1350 Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Thu, 3 Oct 2024 12:36:15 +0900 Subject: [PATCH 098/104] IAMROOT20 20240928 - Add comments Signed-off-by: Daero Lee --- README.md | 10 ++++++++++ mm/mm_init.c | 14 ++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/README.md b/README.md index a4c2c5ddc1602..82476ebb1b045 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,16 @@ ## History +### 74 주차 +- 2024.09.28 2명(이대로, 임채훈) +- setup_arch + - bootmem_init + - zone_sizes_init + - free_area_init + - memmap_init + - memmap_init_zone_range + - memmap_init_range + ### 73 주차 - 2024.09.21 2명(임채훈, 최경건) - free_area_init diff --git a/mm/mm_init.c b/mm/mm_init.c index 8783cdbb9a496..70abf6a73431f 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -713,6 +713,10 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) /* IAMROOT20_END 20240921 */ /* Always populate low zones for address-constrained allocations */ + /* IAMROOT20 20240928 + * zone end_pfn < node end_pfn -> return false + * node에서 마지막 zone(ZONE_NORMAL)의 경우에만, if문을 통과함 + */ if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) return false; @@ -722,6 +726,10 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) * We start only with one section of pages, more pages are added as * needed until the rest of deferred pages are initialized. */ + /* IAMROOT20 20240928 + * defer init이 필요한 page(pfn) 개수가 PAGES_PER_SECTION(0x8000)을 넘으면, + * SECTION 단위로 정렬되어 있는지 확인하고 first_deferred_pfn에 저장 + */ nr_initialised++; if ((nr_initialised > PAGES_PER_SECTION) && (pfn & (PAGES_PER_SECTION - 1)) == 0) { @@ -918,6 +926,7 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone break; } } + /* IAMROOT20_END 20240928 */ page = pfn_to_page(pfn); __init_single_page(page, pfn, zone, nid); @@ -946,6 +955,11 @@ static void __init memmap_init_zone_range(struct zone *zone, unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages; int nid = zone_to_nid(zone), zone_id = zone_idx(zone); + /* IAMROOT20_START 20240928 */ + /* IAMROOT20 20240928 + * start_pfn ~ end_pfn + * - 각 memory pfn range에서 zone pfn range로 clamp 된 영역 + */ start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn); end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn); From 06bda015f6821ee58b0de30f515bdcea7dd81dea Mon Sep 17 00:00:00 2001 From: Daero Lee Date: Sat, 12 Oct 2024 17:35:03 +0900 Subject: [PATCH 099/104] IAMROOT20 2024.10.05 - Add comments Signed-off-by: Daero Lee --- README.md | 9 +++++++++ include/linux/mm.h | 4 ++++ include/linux/mmzone.h | 17 +++++++++++++++++ mm/mm_init.c | 10 ++++++++++ mm/page_alloc.c | 3 +++ 5 files changed, 43 insertions(+) diff --git a/README.md b/README.md index 82476ebb1b045..b5b4eff899ac9 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,15 @@ ## History +### 75 주차 +- 2024.10.05 2명(이대로, 임채훈) +- setup_arch + - bootmem_init + - zone_sizes_init + - free_area_init + - memmap_init + - memmap_init_zone_range + ### 74 주차 - 2024.09.28 2명(이대로, 임채훈) - setup_arch diff --git a/include/linux/mm.h b/include/linux/mm.h index 538a2e606aa59..5d0abc4f1886c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1569,6 +1569,10 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags) } #endif +/* IAMROOT20 20241005 + * SPARSEMEM 이지만 VMEMMAP을 사용하지 않는 경우에만, + * page flags에 SECTION 정보를 저장 + */ #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d12b86a7e74ba..08de3535b4a62 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1022,6 +1022,12 @@ static inline bool zone_is_empty(struct zone *zone) */ /* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */ +/* IAMROOT20 20241005 + * SECTION_PGOFF 64 (vmemmap 사용, vmemmap 사용 x : 43) + * NODE_PGOFF 62 + * ZONES_PGOFF 58 + * LAST_CPUID_PGOFF 42 + */ #define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) @@ -1054,6 +1060,17 @@ static inline bool zone_is_empty(struct zone *zone) #define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0)) +/* IAMROOT20 20241005 + * ZONES_WIDTH 2 + * NODES_WIDTH 4 + * SECTION_WIDTH 0 (vmemmap 사용, vmemmap 사용 x : 21) + * LAST_CPUID_SHIFT 16 + * + * ZONES_MASK 0x3 + * NODES_MASK 0xF + * SECTION_MASK 0x1F_FFFF + * LAST_CPUID_MASK 0xFFFF + */ #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) diff --git a/mm/mm_init.c b/mm/mm_init.c index 70abf6a73431f..7f699946cf30c 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -695,6 +695,11 @@ static inline bool __meminit early_page_initialised(unsigned long pfn) * Returns true when the remaining initialisation should be deferred until * later in the boot cycle when it can be parallelised. */ +/* IAMROOT20 20241005 + * 나중에 처리할 pfn을 구해서 first_deferred_pfn에 저장 + * - node의 마지막 zone(보통은 ZONE_NORMAL)일 때만, + * 처음 한 SECTION(128M)을 초기화하고 나머지는 나중에 처리(defer) + */ static bool __meminit defer_init(int nid, unsigned long pfn, unsigned long end_pfn) { @@ -921,6 +926,7 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone */ if (overlap_memmap_init(zone, &pfn)) continue; + /* IAMROOT20_START 20241005 */ if (defer_init(nid, pfn, zone_end_pfn)) { deferred_struct_pages = true; break; @@ -938,6 +944,9 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone * such that unmovable allocations won't be scattered all * over the place during system boot. */ + /* IAMROOT20 20241005 + * pageblock 단위(512개 page)로 migratetype을 지정 + */ if (pageblock_aligned(pfn)) { set_pageblock_migratetype(page, migratetype); cond_resched(); @@ -968,6 +977,7 @@ static void __init memmap_init_zone_range(struct zone *zone, memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn, zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); + /* IAMROOT20_END 20241005 */ if (*hole_pfn < start_pfn) init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 295c12a245201..ca544eb5f7193 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -444,6 +444,9 @@ static inline unsigned long *get_pageblock_bitmap(const struct page *page, static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM + /* IAMROOT20 20241005 + * 한 SECTION(128M) 안에서 몇 번 째 pfn 인지를 구함 + */ pfn &= (PAGES_PER_SECTION-1); #else pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn); From 4325fe26d848254dae6245a9e599252f75321520 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 12 Oct 2024 13:13:12 +0000 Subject: [PATCH 100/104] IAMROOT20 20241012 - Add comments --- include/linux/mmzone.h | 1 + mm/mm_init.c | 7 ++++++- tools/testing/memblock/linux/mmzone.h | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 08de3535b4a62..5621038e62cb2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2079,6 +2079,7 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; + /* IAMROOT20_END 20241012 */ ms = __pfn_to_section(pfn); if (!valid_section(ms)) return 0; diff --git a/mm/mm_init.c b/mm/mm_init.c index 7f699946cf30c..3bbac614e89ad 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -855,6 +855,11 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) * zone/node above the hole except for the trailing pages in the last * section that will be appended to the zone/node below. */ +/* IAMROOT20 20241012 + * spfn : hole의 시작 주소 + * epfn : hole의 끝 주소 + * pgcnt : hole로 설정할 수 있는 valid한 page의 개수 + */ static void __init init_unavailable_range(unsigned long spfn, unsigned long epfn, int zone, int node) @@ -977,7 +982,7 @@ static void __init memmap_init_zone_range(struct zone *zone, memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn, zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); - /* IAMROOT20_END 20241005 */ + /* IAMROOT20_END 20241005 */ /* IAMROOT20_START 20241012 */ if (*hole_pfn < start_pfn) init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid); diff --git a/tools/testing/memblock/linux/mmzone.h b/tools/testing/memblock/linux/mmzone.h index 134f8eab0768f..e6c990f50a952 100644 --- a/tools/testing/memblock/linux/mmzone.h +++ b/tools/testing/memblock/linux/mmzone.h @@ -23,6 +23,10 @@ enum zone_type { #define pageblock_order MAX_ORDER #define pageblock_nr_pages BIT(pageblock_order) #define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) +/* IAMROOT20 20241012 + * pageblock_nr_pages : 1024 + * pageblock_start_pfn : pfn 주소의 하위 10개의 비트를 0으로 설정 -> align down +*/ #define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) struct zone { From a0cd47478c7a69f00f46205c3edaf3b8d4963eb1 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 12 Oct 2024 13:15:45 +0000 Subject: [PATCH 101/104] IAMROOT20 20241012 - Update README --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index b5b4eff899ac9..fe14b7d2764b9 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,17 @@ ## History +### 76 주차 +- 2024.10.05 2명(임채훈, 최경건) +- setup_arch + - bootmem_init + - zone_sizes_init + - free_area_init + - memmap_init + - memmap_init_zone_range + - init_unavailable_range + - pfn_valid + ### 75 주차 - 2024.10.05 2명(이대로, 임채훈) - setup_arch From b22535d8dd717575f41aae2e5db5dc6aadcd13f7 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 12 Oct 2024 13:17:31 +0000 Subject: [PATCH 102/104] IAMROOT20 20241012 - Fix README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fe14b7d2764b9..2bb63b7a14c85 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ ## History ### 76 주차 -- 2024.10.05 2명(임채훈, 최경건) +- 2024.10.12 2명(임채훈, 최경건) - setup_arch - bootmem_init - zone_sizes_init From be2b5d3de288d55bcd08b26c11fea06d8a0b0ec4 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 19 Oct 2024 13:13:38 +0000 Subject: [PATCH 103/104] IAMROOT20 20241019 - Add comments --- include/linux/mmzone.h | 6 +++++- mm/mm_init.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5621038e62cb2..f3a3c9cac7887 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2033,6 +2033,10 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn) extern unsigned long __highest_present_section_nr; +/* IAMROOT20 20241019 + * ~(PAGE_SECTION_MASK): 0x7fff = 2^15 = PAGES_PER_SECTION + * PAGES_PER_SUBSECTION: 2^9 + */ static inline int subsection_map_index(unsigned long pfn) { return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION; @@ -2079,7 +2083,7 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - /* IAMROOT20_END 20241012 */ + /* IAMROOT20_END 20241012 */ /* IAMROOT20_START 20241019 */ ms = __pfn_to_section(pfn); if (!valid_section(ms)) return 0; diff --git a/mm/mm_init.c b/mm/mm_init.c index 3bbac614e89ad..ef82a19f49056 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -872,6 +872,7 @@ static void __init init_unavailable_range(unsigned long spfn, pfn = pageblock_end_pfn(pfn) - 1; continue; } + /* IAMROOT20_END 20241019 */ __init_single_page(pfn_to_page(pfn), pfn, zone, node); __SetPageReserved(pfn_to_page(pfn)); pgcnt++; From d71d1ef087577301355d921f060740efe6bdd580 Mon Sep 17 00:00:00 2001 From: sideseal Date: Sat, 19 Oct 2024 13:14:26 +0000 Subject: [PATCH 104/104] IAMROOT20 20241019 - Update README.md --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 2bb63b7a14c85..ac75f570b634f 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,16 @@ ## History +### 77 주차 +- 2024.10.19 2명(임채훈, 최경건) +- setup_arch + - bootmem_init + - zone_sizes_init + - free_area_init + - memmap_init + - memmap_init_zone_range + - init_unavailable_range + ### 76 주차 - 2024.10.12 2명(임채훈, 최경건) - setup_arch