List Info

Thread: ia64: race flushing icache in COW path




ia64: race flushing icache in COW path
user name
2006-07-13 20:37:17
> lazy_mmu_prot_update() is used in a number of other
places *after* the pte 
> is established. An explanation as to why this case is
different, would be 
> interesting.

The other places do need a close look, it seems that some of
them may not be needed (e.g. the one inside "if
(reuse) { }" at
the top of do_wp_page() ... at the moment I'm struggling to
see
what it manages to achieve).

Most of the rest are in cases where we are adding a new
virtual
page (comments like "No need to invalidate - it was
non-present
before").  These may also need to have the order
shuffled, but
they seem unlikely to be the cause of a bug (it is unlikely
that an application has threads that branch to new anonymous
pages as they are being attached to the process).

So you are right that there may be some more work here, but
I wanted to get the one-liner that is a clear and obvious
bugfix posted without being cluttered with some less obvious
fixes.

-Tony
-
To unsubscribe from this list: send the line
"unsubscribe linux-ia64" in
the body of a message to majordomovger.kernel.org
More majordomo info at  http://vge
r.kernel.org/majordomo-info.html
ia64: race flushing icache in COW path
user name
2006-07-14 03:11:47
On Thu, 2006-07-13 at 13:37 -0700, Luck, Tony wrote:
> > lazy_mmu_prot_update() is used in a number of
other places *after* the pte 
> > is established. An explanation as to why this case
is different, would be 
> > interesting.
> 
> The other places do need a close look, it seems that
some of
> them may not be needed (e.g. the one inside "if
(reuse) { }" at
> the top of do_wp_page() ... at the moment I'm
struggling to see
> what it manages to achieve).
> 
> Most of the rest are in cases where we are adding a new
virtual
> page (comments like "No need to invalidate - it
was non-present
> before").  These may also need to have the order
shuffled, but
> they seem unlikely to be the cause of a bug (it is
unlikely
> that an application has threads that branch to new
anonymous
> pages as they are being attached to the process).
> 
> So you are right that there may be some more work here,
but
> I wanted to get the one-liner that is a clear and
obvious
> bugfix posted without being cluttered with some less
obvious
> fixes.

How about something like this.
---

From: Peter Zijlstra <a.p.zijlstrachello.nl>

Since calling of lazy_mmu_prot_update() while the PTE is
already set
could open up a race window wrt. icache coherency, make sure
all
invocations are done before setting the PTE.

These call-sites are currently not problematic, the one that
was has 
been addressed by a previous patch. This patch just tidies
up the
call semantics for lazy_mmu_prot_update().

Signed-Off-By: Peter Zijlstra <a.p.zijlstrachello.nl>
---
 mm/hugetlb.c  |    4 ++--
 mm/memory.c   |   10 +++++-----
 mm/mprotect.c |    2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

Index: linux-2.6/mm/hugetlb.c
============================================================
=======
--- linux-2.6.orig/mm/hugetlb.c	2006-07-14
04:54:48.000000000 +0200
+++ linux-2.6/mm/hugetlb.c	2006-07-14 05:04:58.000000000
+0200
 -313,9
+313,9  static void set_huge_ptep_writable(struc
 	pte_t entry;
 
 	entry = pte_mkwrite(pte_mkdirty(*ptep));
+	lazy_mmu_prot_update(entry);
 	ptep_set_access_flags(vma, address, ptep, entry, 1);
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 }
 
 
 -634,8
+634,8  void hugetlb_change_protection(struct vm
 		if (!pte_none(*ptep)) {
 			pte = huge_ptep_get_and_clear(mm, address, ptep);
 			pte = pte_mkhuge(pte_modify(pte, newprot));
-			set_huge_pte_at(mm, address, ptep, pte);
 			lazy_mmu_prot_update(pte);
+			set_huge_pte_at(mm, address, ptep, pte);
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
Index: linux-2.6/mm/memory.c
============================================================
=======
--- linux-2.6.orig/mm/memory.c	2006-07-14 05:00:06.000000000
+0200
+++ linux-2.6/mm/memory.c	2006-07-14 05:02:07.000000000
+0200
 -1506,9
+1506,9  static int do_wp_page(struct mm_struct *
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = pte_mkyoung(orig_pte);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		lazy_mmu_prot_update(entry);
 		ptep_set_access_flags(vma, address, page_table, entry,
1);
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 		ret |= VM_FAULT_WRITE;
 		goto unlock;
 	}
 -1980,6
+1980,7  static int do_swap_page(struct mm_struct
 	}
 
 	flush_icache_page(vma, page);
+	lazy_mmu_prot_update(pte);
 	set_pte_at(mm, address, page_table, pte);
 	page_add_anon_rmap(page, vma, address);
 
 -1997,7
+1998,6  static int do_swap_page(struct mm_struct
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, pte);
-	lazy_mmu_prot_update(pte);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 out:
 -2055,11
+2055,11  static int do_anonymous_page(struct mm_s
 		page_add_file_rmap(page);
 	}
 
+	lazy_mmu_prot_update(entry);
 	set_pte_at(mm, address, page_table, entry);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return VM_FAULT_MINOR;
 -2180,6
+2180,7  retry:
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		lazy_mmu_prot_update(entry);
 		set_pte_at(mm, address, page_table, entry);
 		if (anon) {
 			inc_mm_counter(mm, anon_rss);
 -2197,7
+2198,6  retry:
 
 	/* no need to invalidate: a not-present page shouldn't be
cached */
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return ret;
 -2293,9
+2293,9  static inline int handle_pte_fault(struc
 	}
 	entry = pte_mkyoung(entry);
 	if (!pte_same(old_entry, entry)) {
+		lazy_mmu_prot_update(entry);
 		ptep_set_access_flags(vma, address, pte, entry,
write_access);
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 	} else {
 		/*
 		 * This is needed only for protection faults but the arch
code
Index: linux-2.6/mm/mprotect.c
============================================================
=======
--- linux-2.6.orig/mm/mprotect.c	2006-07-14
04:54:48.000000000 +0200
+++ linux-2.6/mm/mprotect.c	2006-07-14 05:03:37.000000000
+0200
 -43,8
+43,8  static void change_pte_range(struct mm_s
 			 * into place.
 			 */
 			ptent = pte_modify(ptep_get_and_clear(mm, addr, pte),
newprot);
-			set_pte_at(mm, addr, pte, ptent);
 			lazy_mmu_prot_update(ptent);
+			set_pte_at(mm, addr, pte, ptent);
 #ifdef CONFIG_MIGRATION
 		} else if (!pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);


-
To unsubscribe from this list: send the line
"unsubscribe linux-ia64" in
the body of a message to majordomovger.kernel.org
More majordomo info at  http://vge
r.kernel.org/majordomo-info.html
ia64: race flushing icache in COW path
user name
2006-07-14 03:11:47
On Thu, 2006-07-13 at 13:37 -0700, Luck, Tony wrote:
> > lazy_mmu_prot_update() is used in a number of
other places *after* the pte 
> > is established. An explanation as to why this case
is different, would be 
> > interesting.
> 
> The other places do need a close look, it seems that
some of
> them may not be needed (e.g. the one inside "if
(reuse) { }" at
> the top of do_wp_page() ... at the moment I'm
struggling to see
> what it manages to achieve).
> 
> Most of the rest are in cases where we are adding a new
virtual
> page (comments like "No need to invalidate - it
was non-present
> before").  These may also need to have the order
shuffled, but
> they seem unlikely to be the cause of a bug (it is
unlikely
> that an application has threads that branch to new
anonymous
> pages as they are being attached to the process).
> 
> So you are right that there may be some more work here,
but
> I wanted to get the one-liner that is a clear and
obvious
> bugfix posted without being cluttered with some less
obvious
> fixes.

How about something like this.
---

From: Peter Zijlstra <a.p.zijlstrachello.nl>

Since calling of lazy_mmu_prot_update() while the PTE is
already set
could open up a race window wrt. icache coherency, make sure
all
invocations are done before setting the PTE.

These call-sites are currently not problematic, the one that
was has 
been addressed by a previous patch. This patch just tidies
up the
call semantics for lazy_mmu_prot_update().

Signed-Off-By: Peter Zijlstra <a.p.zijlstrachello.nl>
---
 mm/hugetlb.c  |    4 ++--
 mm/memory.c   |   10 +++++-----
 mm/mprotect.c |    2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

Index: linux-2.6/mm/hugetlb.c
============================================================
=======
--- linux-2.6.orig/mm/hugetlb.c	2006-07-14
04:54:48.000000000 +0200
+++ linux-2.6/mm/hugetlb.c	2006-07-14 05:04:58.000000000
+0200
 -313,9
+313,9  static void set_huge_ptep_writable(struc
 	pte_t entry;
 
 	entry = pte_mkwrite(pte_mkdirty(*ptep));
+	lazy_mmu_prot_update(entry);
 	ptep_set_access_flags(vma, address, ptep, entry, 1);
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 }
 
 
 -634,8
+634,8  void hugetlb_change_protection(struct vm
 		if (!pte_none(*ptep)) {
 			pte = huge_ptep_get_and_clear(mm, address, ptep);
 			pte = pte_mkhuge(pte_modify(pte, newprot));
-			set_huge_pte_at(mm, address, ptep, pte);
 			lazy_mmu_prot_update(pte);
+			set_huge_pte_at(mm, address, ptep, pte);
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
Index: linux-2.6/mm/memory.c
============================================================
=======
--- linux-2.6.orig/mm/memory.c	2006-07-14 05:00:06.000000000
+0200
+++ linux-2.6/mm/memory.c	2006-07-14 05:02:07.000000000
+0200
 -1506,9
+1506,9  static int do_wp_page(struct mm_struct *
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = pte_mkyoung(orig_pte);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		lazy_mmu_prot_update(entry);
 		ptep_set_access_flags(vma, address, page_table, entry,
1);
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 		ret |= VM_FAULT_WRITE;
 		goto unlock;
 	}
 -1980,6
+1980,7  static int do_swap_page(struct mm_struct
 	}
 
 	flush_icache_page(vma, page);
+	lazy_mmu_prot_update(pte);
 	set_pte_at(mm, address, page_table, pte);
 	page_add_anon_rmap(page, vma, address);
 
 -1997,7
+1998,6  static int do_swap_page(struct mm_struct
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, pte);
-	lazy_mmu_prot_update(pte);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 out:
 -2055,11
+2055,11  static int do_anonymous_page(struct mm_s
 		page_add_file_rmap(page);
 	}
 
+	lazy_mmu_prot_update(entry);
 	set_pte_at(mm, address, page_table, entry);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return VM_FAULT_MINOR;
 -2180,6
+2180,7  retry:
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		lazy_mmu_prot_update(entry);
 		set_pte_at(mm, address, page_table, entry);
 		if (anon) {
 			inc_mm_counter(mm, anon_rss);
 -2197,7
+2198,6  retry:
 
 	/* no need to invalidate: a not-present page shouldn't be
cached */
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return ret;
 -2293,9
+2293,9  static inline int handle_pte_fault(struc
 	}
 	entry = pte_mkyoung(entry);
 	if (!pte_same(old_entry, entry)) {
+		lazy_mmu_prot_update(entry);
 		ptep_set_access_flags(vma, address, pte, entry,
write_access);
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 	} else {
 		/*
 		 * This is needed only for protection faults but the arch
code
Index: linux-2.6/mm/mprotect.c
============================================================
=======
--- linux-2.6.orig/mm/mprotect.c	2006-07-14
04:54:48.000000000 +0200
+++ linux-2.6/mm/mprotect.c	2006-07-14 05:03:37.000000000
+0200
 -43,8
+43,8  static void change_pte_range(struct mm_s
 			 * into place.
 			 */
 			ptent = pte_modify(ptep_get_and_clear(mm, addr, pte),
newprot);
-			set_pte_at(mm, addr, pte, ptent);
 			lazy_mmu_prot_update(ptent);
+			set_pte_at(mm, addr, pte, ptent);
 #ifdef CONFIG_MIGRATION
 		} else if (!pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);


-
To unsubscribe from this list: send the line
"unsubscribe linux-ia64" in
the body of a message to majordomovger.kernel.org
More majordomo info at  http://vge
r.kernel.org/majordomo-info.html
ia64: race flushing icache in COW path
user name
2006-07-14 03:11:47
On Thu, 2006-07-13 at 13:37 -0700, Luck, Tony wrote:
> > lazy_mmu_prot_update() is used in a number of
other places *after* the pte 
> > is established. An explanation as to why this case
is different, would be 
> > interesting.
> 
> The other places do need a close look, it seems that
some of
> them may not be needed (e.g. the one inside "if
(reuse) { }" at
> the top of do_wp_page() ... at the moment I'm
struggling to see
> what it manages to achieve).
> 
> Most of the rest are in cases where we are adding a new
virtual
> page (comments like "No need to invalidate - it
was non-present
> before").  These may also need to have the order
shuffled, but
> they seem unlikely to be the cause of a bug (it is
unlikely
> that an application has threads that branch to new
anonymous
> pages as they are being attached to the process).
> 
> So you are right that there may be some more work here,
but
> I wanted to get the one-liner that is a clear and
obvious
> bugfix posted without being cluttered with some less
obvious
> fixes.

How about something like this.
---

From: Peter Zijlstra <a.p.zijlstrachello.nl>

Since calling of lazy_mmu_prot_update() while the PTE is
already set
could open up a race window wrt. icache coherency, make sure
all
invocations are done before setting the PTE.

These call-sites are currently not problematic, the one that
was has 
been addressed by a previous patch. This patch just tidies
up the
call semantics for lazy_mmu_prot_update().

Signed-Off-By: Peter Zijlstra <a.p.zijlstrachello.nl>
---
 mm/hugetlb.c  |    4 ++--
 mm/memory.c   |   10 +++++-----
 mm/mprotect.c |    2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

Index: linux-2.6/mm/hugetlb.c
============================================================
=======
--- linux-2.6.orig/mm/hugetlb.c	2006-07-14
04:54:48.000000000 +0200
+++ linux-2.6/mm/hugetlb.c	2006-07-14 05:04:58.000000000
+0200
 -313,9
+313,9  static void set_huge_ptep_writable(struc
 	pte_t entry;
 
 	entry = pte_mkwrite(pte_mkdirty(*ptep));
+	lazy_mmu_prot_update(entry);
 	ptep_set_access_flags(vma, address, ptep, entry, 1);
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 }
 
 
 -634,8
+634,8  void hugetlb_change_protection(struct vm
 		if (!pte_none(*ptep)) {
 			pte = huge_ptep_get_and_clear(mm, address, ptep);
 			pte = pte_mkhuge(pte_modify(pte, newprot));
-			set_huge_pte_at(mm, address, ptep, pte);
 			lazy_mmu_prot_update(pte);
+			set_huge_pte_at(mm, address, ptep, pte);
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
Index: linux-2.6/mm/memory.c
============================================================
=======
--- linux-2.6.orig/mm/memory.c	2006-07-14 05:00:06.000000000
+0200
+++ linux-2.6/mm/memory.c	2006-07-14 05:02:07.000000000
+0200
 -1506,9
+1506,9  static int do_wp_page(struct mm_struct *
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = pte_mkyoung(orig_pte);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		lazy_mmu_prot_update(entry);
 		ptep_set_access_flags(vma, address, page_table, entry,
1);
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 		ret |= VM_FAULT_WRITE;
 		goto unlock;
 	}
 -1980,6
+1980,7  static int do_swap_page(struct mm_struct
 	}
 
 	flush_icache_page(vma, page);
+	lazy_mmu_prot_update(pte);
 	set_pte_at(mm, address, page_table, pte);
 	page_add_anon_rmap(page, vma, address);
 
 -1997,7
+1998,6  static int do_swap_page(struct mm_struct
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, pte);
-	lazy_mmu_prot_update(pte);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 out:
 -2055,11
+2055,11  static int do_anonymous_page(struct mm_s
 		page_add_file_rmap(page);
 	}
 
+	lazy_mmu_prot_update(entry);
 	set_pte_at(mm, address, page_table, entry);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return VM_FAULT_MINOR;
 -2180,6
+2180,7  retry:
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		lazy_mmu_prot_update(entry);
 		set_pte_at(mm, address, page_table, entry);
 		if (anon) {
 			inc_mm_counter(mm, anon_rss);
 -2197,7
+2198,6  retry:
 
 	/* no need to invalidate: a not-present page shouldn't be
cached */
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return ret;
 -2293,9
+2293,9  static inline int handle_pte_fault(struc
 	}
 	entry = pte_mkyoung(entry);
 	if (!pte_same(old_entry, entry)) {
+		lazy_mmu_prot_update(entry);
 		ptep_set_access_flags(vma, address, pte, entry,
write_access);
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 	} else {
 		/*
 		 * This is needed only for protection faults but the arch
code
Index: linux-2.6/mm/mprotect.c
============================================================
=======
--- linux-2.6.orig/mm/mprotect.c	2006-07-14
04:54:48.000000000 +0200
+++ linux-2.6/mm/mprotect.c	2006-07-14 05:03:37.000000000
+0200
 -43,8
+43,8  static void change_pte_range(struct mm_s
 			 * into place.
 			 */
 			ptent = pte_modify(ptep_get_and_clear(mm, addr, pte),
newprot);
-			set_pte_at(mm, addr, pte, ptent);
 			lazy_mmu_prot_update(ptent);
+			set_pte_at(mm, addr, pte, ptent);
 #ifdef CONFIG_MIGRATION
 		} else if (!pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);


-
To unsubscribe from this list: send the line
"unsubscribe linux-ia64" in
the body of a message to majordomovger.kernel.org
More majordomo info at  http://vge
r.kernel.org/majordomo-info.html
[1-4]

about | contact  Other archives ( Real Estate discussion Medical topics )