List Info

Thread: Page table shuffling




Page table shuffling
user name
2006-06-21 03:17:07
Hi,

Prompted by kexec [1], I decided to revive a simplified
version of
this patch.  This should make no functional changes, but
should make
it slightly easier for fresh eyes to navigate the IA64 VM
layer.

Against 2.6.17 and tested on the simulator (with a slight
fix [2]) and
ZX1.

Thanks,

-i

[1] http://www.gelato.unsw.edu.au/archives/linux-ia
64/0606/18542.html
[2] http://www.gelato.unsw.edu.au/archives/linux-ia
64/0606/18550.html
-
To unsubscribe from this list: send the line
"unsubscribe linux-ia64" in
the body of a message to majordomovger.kernel.org
More majordomo info at  http://vge
r.kernel.org/majordomo-info.html
pgtable.h - reflow macros
user name
2006-06-21 03:17:13
Make the macros for each level consist, and document them. 
Previous
criticism has been that upper layers will never be more than
a
pointer, so why bother, but since it all falls out in the
preprocessor
I don't think it matters.

Signed-Off-By: Ian Wienand <ianwgelato.unsw.edu.au>

---

 pgtable.h |  109
++++++++++++++++++++++++++++++++++++++++--------------------
--
 1 file changed, 72 insertions(+), 37 deletions(-)

--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
 -84,54
+84,72 
 #define __DIRTY_BITS		_PAGE_ED | __DIRTY_BITS_NO_ED
 
 /*
- * How many pointers will a page table level hold expressed
in shift
+ * Overall page table macro explanations
+ * =====================================
+ * level_ENTRY_BITS : How many bits in one entry at this
level.
+ *                    i.e. log2(width of entry)
+ * level_INDEX_BITS : Those bits in a page full of entries
that identify unique entries.
+ *                    i.e. PAGE_SIZE / (width of entry)
+ * PTRS_PER_level   : The number of entries of level a page
can hold
+ *                    i.e. 2^level_INDEX_BITS
+ * level_SHIFT      : number of bits this level maps
(cumulative with lower levels)
+ * level_SIZE       : how much an address space an entry at
this level maps
+ *                    i.e. 2^level_SHIFT
+ * level_MASK       : mask of bits that make up this level
and lower levels
  */
-#define PTRS_PER_PTD_SHIFT	(PAGE_SHIFT-3)
 
 /*
  * Definitions for fourth level:
+ * A PTD is a page full of PTE entries
  */
-#define PTRS_PER_PTE	(__IA64_UL(1) <<
(PTRS_PER_PTD_SHIFT))
+#define PTD_ENTRY_BITS	3
+#define PTD_INDEX_BITS	(PAGE_SHIFT - PTD_ENTRY_BITS)
+#define PTRS_PER_PTD	(__IA64_UL(1) << PTD_INDEX_BITS)
+/* some other places in the kernel expect PTRS_PER_PTE to
be defined
+ * to the number of ptes in a page; we define it here but
try not to
+ * use it to avoid further confusion
+ */
+#define PTRS_PER_PTE	PTRS_PER_PTD
+#define PTD_SHIFT	PAGE_SHIFT
 
 /*
- * Definitions for third level:
- *
- * PMD_SHIFT determines the size of the area a third-level
page table
- * can map.
+ * Definitions for third level (middle)
  */
-#define PMD_SHIFT	(PAGE_SHIFT + (PTRS_PER_PTD_SHIFT))
-#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_ENTRY_BITS	3
+#define PMD_INDEX_BITS	(PAGE_SHIFT - PMD_ENTRY_BITS)
+#define PMD_SHIFT	(PMD_INDEX_BITS + PTD_SHIFT)
+#define PMD_SIZE	(__IA64_UL(1) << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
-#define PTRS_PER_PMD	(1UL << (PTRS_PER_PTD_SHIFT))
+#define PTRS_PER_PMD	(__IA64_UL(1) <<
(PMD_INDEX_BITS))
 
-#ifdef CONFIG_PGTABLE_4
 /*
- * Definitions for second level:
- *
- * PUD_SHIFT determines the size of the area a second-level
page table
- * can map.
+ * Definitions for second level (upper)
+ * By default we do not have 4 level page tables
  */
-#define PUD_SHIFT	(PMD_SHIFT + (PTRS_PER_PTD_SHIFT))
-#define PUD_SIZE	(1UL << PUD_SHIFT)
+#ifdef CONFIG_PGTABLE_4
+#define PUD_ENTRY_BITS	3
+#define PUD_INDEX_BITS	(PAGE_SHIFT - PUD_ENTRY_BITS)
+#define PUD_SHIFT	(PUD_INDEX_BITS + PMD_SHIFT)
+#define PUD_SIZE	(__IA64_UL(1) << PUD_SHIFT)
 #define PUD_MASK	(~(PUD_SIZE-1))
-#define PTRS_PER_PUD	(1UL << (PTRS_PER_PTD_SHIFT))
+#define PTRS_PER_PUD	(__IA64_UL(1) << PUD_INDEX_BITS)
 #endif
 
 /*
- * Definitions for first level:
- *
- * PGDIR_SHIFT determines what a first-level page table
entry can map.
+ * Definitions for first level (global)
  */
+#define PGD_ENTRY_BITS		3
+#define PGD_INDEX_BITS		(PAGE_SHIFT - PGD_ENTRY_BITS)
 #ifdef CONFIG_PGTABLE_4
-#define PGDIR_SHIFT		(PUD_SHIFT + (PTRS_PER_PTD_SHIFT))
+#define PGDIR_SHIFT		(PGD_INDEX_BITS + PUD_SHIFT)
 #else
-#define PGDIR_SHIFT		(PMD_SHIFT + (PTRS_PER_PTD_SHIFT))
+#define PGDIR_SHIFT		(PGD_INDEX_BITS + PMD_SHIFT)
 #endif
 #define PGDIR_SIZE		(__IA64_UL(1) << PGDIR_SHIFT)
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD_SHIFT	PTRS_PER_PTD_SHIFT
-#define PTRS_PER_PGD		(1UL << PTRS_PER_PGD_SHIFT)
-#define USER_PTRS_PER_PGD	(5*PTRS_PER_PGD/8)	/* regions 0-4
are user regions */
+#define PTRS_PER_PGD		(__IA64_UL(1) <<
PGD_INDEX_BITS)
+/* Of the 8 regions, userspace may only map in the 5
regions 0-4 */
+#define USER_PTRS_PER_PGD	(5 * (PTRS_PER_PGD/8))
 #define FIRST_USER_ADDRESS	0
 
 /*
 -231,22
+249,40  ia64_phys_addr_valid (unsigned long addr
 #define set_pte(ptep, pteval)	(*(ptep) = (pteval))
 #define set_pte_at(mm,addr,ptep,pteval)
set_pte(ptep,pteval)
 
+/*
+ * MAPPED_SPACE_BITS is the number of bits that make up the
maximum
+ * space we can map with our page tables
+ * i.e a page full of pgd entries fully populated
+ */
+#define MAPPED_SPACE_BITS	(PGDIR_SHIFT + PGD_INDEX_BITS)
+/* PGTABLE_MAP_LIMIT is how much space we can map with our
page tables */
+#define PGTABLE_MAP_LIMIT	(__IA64_UL(1) <<
MAPPED_SPACE_BITS)
+
+/*
+ * RGN_MAP_LIMIT is the most one region can map, thus we
remove the
+ * three region bits from the calculation.  Also remove a
guard page.
+ */
+#define RGN_MAP_LIMIT		((__IA64_UL(1) <<
(MAPPED_SPACE_BITS - 3)) - PAGE_SIZE)
+
 #define VMALLOC_START		(RGN_BASE(RGN_GATE) + 0x200000000UL)
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-# define VMALLOC_END_INIT	(RGN_BASE(RGN_GATE) + (1UL
<< (4*PAGE_SHIFT - 9)))
+/*
+ * With virtual mem_map, we reduce the vmalloc space and
place the
+ * vmem_map virtual array above the vmalloc space.  The
actual
+ * vmem_map size obviously depends on how much physical
memory we
+ * have.
+ */
+# define VMALLOC_END_INIT	(RGN_BASE(RGN_GATE) +
PGTABLE_MAP_LIMIT)
 # define VMALLOC_END		vmalloc_end
   extern unsigned long vmalloc_end;
 #else
-# define VMALLOC_END		(RGN_BASE(RGN_GATE) + (1UL <<
(4*PAGE_SHIFT - 9)))
+# define VMALLOC_END		(RGN_BASE(RGN_GATE) +
PGTABLE_MAP_LIMIT)
 #endif
 
 /* fs/proc/kcore.c */
 #define	kc_vaddr_to_offset(v) ((v) - RGN_BASE(RGN_GATE))
 #define	kc_offset_to_vaddr(o) ((o) + RGN_BASE(RGN_GATE))
 
-#define RGN_MAP_SHIFT (PGDIR_SHIFT + PTRS_PER_PGD_SHIFT -
3)
-#define RGN_MAP_LIMIT	((1UL << RGN_MAP_SHIFT) -
PAGE_SIZE)	/* per region addr limit */
-
 /*
  * Conversion functions: convert page frame number (pfn)
and a protection value to a page
  * table entry (pte).
 -333,17
+369,17  ia64_phys_addr_valid (unsigned long addr
  */
 #define
pgprot_writecombine(prot)	__pgprot((pgprot_val(prot) &
~_PAGE_MA_MASK) | _PAGE_MA_WC)
 
+/* The offset in the PGD directory is given by the 3 region
bits
+   (61..63) and the level-1 bits.  */
 static inline unsigned long
 pgd_index (unsigned long address)
 {
 	unsigned long region = address >> 61;
-	unsigned long l1index = (address >> PGDIR_SHIFT)
& ((PTRS_PER_PGD >> 3) - 1);
+	unsigned long index = (address >> PGDIR_SHIFT) &
((PTRS_PER_PGD >> 3) - 1);
 
-	return (region << (PAGE_SHIFT - 6)) | l1index;
+	return (region << (PGD_INDEX_BITS - 3)) | index;
 }
 
-/* The offset in the 1-level directory is given by the 3
region bits
-   (61..63) and the level-1 bits.  */
 static inline pgd_t*
 pgd_offset (struct mm_struct *mm, unsigned long address)
 {
 -374,7
+410,7  pgd_offset (struct mm_struct *mm, unsign
  * Find an entry in the third-level page table.  This looks
more complicated than it
  * should be because some platforms place page tables in
high memory.
  */
-#define pte_index(addr)	 	(((addr) >> PAGE_SHIFT)
& (PTRS_PER_PTE - 1))
+#define pte_index(addr)	 	(((addr) >> PAGE_SHIFT)
& (PTRS_PER_PTD - 1))
 #define pte_offset_kernel(dir,addr)	((pte_t *)
pmd_page_kernel(*(dir)) + pte_index(addr))
 #define pte_offset_map(dir,addr)	pte_offset_kernel(dir,
addr)
 #define pte_offset_map_nested(dir,addr)	pte_offset_map(dir,
addr)

-
To unsubscribe from this list: send the line
"unsubscribe linux-ia64" in
the body of a message to majordomovger.kernel.org
More majordomo info at  http://vge
r.kernel.org/majordomo-info.html
-
To unsubscribe from this list: send the line
"unsubscribe linux-ia64" in
the body of a message to majordomovger.kernel.org
More majordomo info at  http://vge
r.kernel.org/majordomo-info.html
init.c - use modified macros
user name
2006-06-21 03:17:19
Remove an un-needed extra definition of the PTE size

Signed-Off-By: Ian Wienand <ianwgelato.unsw.edu.au>

---

 init.c |   11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

Index: linux-2.6.17/arch/ia64/mm/init.c
============================================================
=======
--- linux-2.6.17.orig/arch/ia64/mm/init.c	2006-06-18
11:49:35.000000000 +1000
+++ linux-2.6.17/arch/ia64/mm/init.c	2006-06-21
11:08:08.000000000 +1000
 -366,17
+366,14 
 	 * problem in practice.  Alternatively, we could truncate
the top of the mapped
 	 * address space to not permit mappings that would overlap
with the VMLPT.
 	 * --davidm 00/12/06
-	 */
-#	define pte_bits			3
-#	define mapped_space_bits	(3*(PAGE_SHIFT - pte_bits) +
PAGE_SHIFT)
-	/*
+	 *
 	 * The virtual page table has to cover the entire
implemented address space within
 	 * a region even though not all of this space may be
mappable.  The reason for
 	 * this is that the Access bit and Dirty bit fault
handlers perform
 	 * non-speculative accesses to the virtual page table, so
the address range of the
 	 * virtual page table itself needs to be covered by
virtual page table.
 	 */
-#	define vmlpt_bits		(impl_va_bits - PAGE_SHIFT + pte_bits)
+#	define vmlpt_bits		(impl_va_bits - PAGE_SHIFT +
PTD_ENTRY_BITS)
 #	define POW2(n)			(1ULL << (n))
 
 	impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask |
(7UL << 61)));
 -389,8
+386,8 
 	 * the test makes sure that our mapped space doesn't
overlap the
 	 * unimplemented hole in the middle of the region.
 	 */
-	if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits -
pte_bits) ||
-	    (mapped_space_bits > impl_va_bits - 1))
+	if ((MAPPED_SPACE_BITS - PAGE_SHIFT > vmlpt_bits -
PTD_ENTRY_BITS) ||
+	    (MAPPED_SPACE_BITS > impl_va_bits - 1))
 		panic("Cannot build a big enough virtual-linear
page table"
 		      " to cover mapped address space.\n"
 		      " Try using a smaller page size.\n");
-
To unsubscribe from this list: send the line
"unsubscribe linux-ia64" in
the body of a message to majordomovger.kernel.org
More majordomo info at  http://vge
r.kernel.org/majordomo-info.html
init.c - extract PTA setup
user name
2006-06-21 03:17:24
Kexec wants a way to enable and disable the VHPT, and it
seems quite
straight forward to abstract it.  I think the reduced
ia64_mmu_init is
an improvement.

Signed-Off-By: Ian Wienand <ianwgelato.unsw.edu.au>

---

 arch/ia64/mm/init.c        |   55
++++++++++++++++++++++++++++-----------------
 include/asm-ia64/pgalloc.h |    3 ++
 2 files changed, 38 insertions(+), 20 deletions(-)

Index: linux-2.6.17/arch/ia64/mm/init.c
============================================================
=======
--- linux-2.6.17.orig/arch/ia64/mm/init.c	2006-06-21
11:10:47.000000000 +1000
+++ linux-2.6.17/arch/ia64/mm/init.c	2006-06-21
12:02:49.000000000 +1000
 -335,26
+335,9 
 	ia64_patch_gate();
 }
 
-void __devinit
-ia64_mmu_init (void *my_cpu_data)
+static void ia64_initialize_pta(int vhpt_enable)
 {
-	unsigned long psr, pta, impl_va_bits;
-	extern void __devinit tlb_init (void);
-
-#ifdef CONFIG_DISABLE_VHPT
-#	define VHPT_ENABLE_BIT	0
-#else
-#	define VHPT_ENABLE_BIT	1
-#endif
-
-	/* Pin mapping for percpu area into TLB */
-	psr = ia64_clear_ic();
-	ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
-		 pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT,
PAGE_KERNEL)),
-		 PERCPU_PAGE_SHIFT);
-
-	ia64_set_psr(psr);
-	ia64_srlz_i();
+	unsigned long pta, impl_va_bits;
 
 	/*
 	 * Check if the virtually mapped linear page table (VMLPT)
overlaps with a mapped
 -402,8
+385,40 
 	 * size of the table, and bit 0 whether the VHPT walker is
 	 * enabled.
 	 */
-	ia64_set_pta(pta | (0 << 8) | (vmlpt_bits <<
2) | VHPT_ENABLE_BIT);
+	ia64_set_pta(pta | (0 << 8) | (vmlpt_bits <<
2) | vhpt_enable);
+}
+
+void ia64_enable_vhpt(void)
+{
+	ia64_initialize_pta(1);
+}
 
+void ia64_disable_vhpt(void)
+{
+	ia64_initialize_pta(0);
+}
+
+void __devinit
+ia64_mmu_init (void *my_cpu_data)
+{
+	unsigned long psr;
+	extern void __devinit tlb_init (void);
+
+	/* Pin mapping for percpu area into TLB */
+	psr = ia64_clear_ic();
+	ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
+		 pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT,
PAGE_KERNEL)),
+		 PERCPU_PAGE_SHIFT);
+
+	ia64_set_psr(psr);
+	ia64_srlz_i();
+
+#ifdef CONFIG_DISABLE_VHPT
+	printk(KERN_INFO "VHPT walker disabled\n");
+	ia64_disable_vhpt();
+#else
+	ia64_enable_vhpt();
+#endif
 	ia64_tlb_init();
 
 #ifdef	CONFIG_HUGETLB_PAGE
Index: linux-2.6.17/include/asm-ia64/pgalloc.h
============================================================
=======
--- linux-2.6.17.orig/include/asm-ia64/pgalloc.h	2006-06-18
11:49:35.000000000 +1000
+++ linux-2.6.17/include/asm-ia64/pgalloc.h	2006-06-21
11:35:41.000000000 +1000
 -161,4
+161,7 
 
 extern void check_pgt_cache(void);
 
+void ia64_enable_vhpt(void);
+void ia64_disable_vhpt(void);
+
 #endif				/* _ASM_IA64_PGALLOC_H */
-
To unsubscribe from this list: send the line
"unsubscribe linux-ia64" in
the body of a message to majordomovger.kernel.org
More majordomo info at  http://vge
r.kernel.org/majordomo-info.html
init.c - use modified macros
user name
2006-06-21 08:05:35
In article <20060621031719.22750.17417.sendpatchsetwagner.orchestra.cse.unsw.EDU.AU> you wrote:
> Remove an un-needed extra definition of the PTE size

Here is a patch relative to yours that makes the surrounding
comments
< 80 columns wide.

-- 
Horms                                           http://www.vergenet.n
et/~horms/

ia64, mm: Make VMLPT comment < 80 characters wide

Signed-off-by: Horms <hormsverge.net.au>
---
 arch/ia64/mm/init.c |   30 +++++++++++++++++-------------
 1 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index df46f15..2bfdaa0 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
 -340,22
+340,26  static void ia64_initialize_pta(int vhpt
 	unsigned long pta, impl_va_bits;
 
 	/*
-	 * Check if the virtually mapped linear page table (VMLPT)
overlaps with a mapped
-	 * address space.  The IA-64 architecture guarantees that
at least 50 bits of
-	 * virtual address space are implemented but if we pick a
large enough page size
-	 * (e.g., 64KB), the mapped address space is big enough
that it will overlap with
-	 * VMLPT.  I assume that once we run on machines big
enough to warrant 64KB pages,
-	 * IMPL_VA_MSB will be significantly bigger, so this is
unlikely to become a
-	 * problem in practice.  Alternatively, we could truncate
the top of the mapped
-	 * address space to not permit mappings that would overlap
with the VMLPT.
+	 * Check if the virtually mapped linear page table (VMLPT)
overlaps
+	 * with a mapped address space.  The IA-64 architecture
guarantees
+	 * that at least 50 bits of virtual address space are
implemented
+	 * but if we pick a large enough page size (e.g., 64KB),
the mapped
+	 * address space is big enough that it will overlap with
VMLPT.  I
+	 * assume that once we run on machines big enough to
warrant 64KB
+	 * pages, IMPL_VA_MSB will be significantly bigger, so
this is
+	 * unlikely to become a problem in practice. 
Alternatively, we
+	 * could truncate the top of the mapped address space to
not permit
+	 * mappings that would overlap with the VMLPT.
 	 * --davidm 00/12/06
 	 *
-	 * The virtual page table has to cover the entire
implemented address space within
-	 * a region even though not all of this space may be
mappable.  The reason for
-	 * this is that the Access bit and Dirty bit fault
handlers perform
-	 * non-speculative accesses to the virtual page table, so
the address range of the
-	 * virtual page table itself needs to be covered by
virtual page table.
+	 * The virtual page table has to cover the entire
implemented
+	 * address space within a region even though not all of
this space
+	 * may be mappable.  The reason for this is that the
Access bit and
+	 * Dirty bit fault handlers perform non-speculative
accesses to the
+	 * virtual page table, so the address range of the virtual
page
+	 * table itself needs to be covered by virtual page table.
 	 */
+
 #	define vmlpt_bits		(impl_va_bits - PAGE_SHIFT +
PTD_ENTRY_BITS)
 #	define POW2(n)			(1ULL << (n))
 
-- 
1.4.0.gd281


-
To unsubscribe from this list: send the line
"unsubscribe linux-ia64" in
the body of a message to majordomovger.kernel.org
More majordomo info at  http://vge
r.kernel.org/majordomo-info.html
[1-5]

about | contact  Other archives ( Real Estate discussion Medical topics )