List Info

Thread: seems I finally found what upset kqemu on amd64 SMP... shared gdt! (please test patch :)




seems I finally found what upset kqemu on amd64 SMP... shared gdt! (please test patch :)
user name
2008-04-29 17:24:58
Yeah, the amd64 kernel reuses the same gdt to setup all
cpus, causing
kqemu to end up restoring the interrupt stackpointer (after
running
guest code using its own cpu state) from the tss of the last
cpu,
regardless which cpu it happened to run on.  And that then
causes the last
cpu's (usually) idle thread's stack to get smashed and the
host doing
multiple panics...  (Which also explains why pinning qemu
onto cpu 1
worked on a 2-way host.)

 Here's the patch I just tested, of course you'd want to
disable this
once the gdt is no longer shared, so assuming someone wants
to fix this,
please also do an OSVERSION bump...

Index: kqemu-freebsd.c
 -34,6
+34,11 
 
 #include <machine/vmparam.h>
 #include <machine/stdarg.h>
+#ifdef __x86_64__
+#include <sys/pcpu.h>
+#include <machine/segments.h>
+#include <machine/tss.h>
+#endif
 
 #include "kqemu-kernel.h"
 
 -264,6
+269,19 
     va_end(ap);
 }
 
+#ifdef __x86_64__
+/* called with interrupts disabled */
+void CDECL kqemu_tss_workaround(void)
+{
+    int gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+
+    gdt_segs[GPROC0_SEL].ssd_base = (long)
&common_tss[PCPU_GET(cpuid)];
+    ssdtosyssd(&gdt_segs[GPROC0_SEL],
+       (struct system_segment_descriptor
*)&gdt[GPROC0_SEL]);
+    ltr(gsel_tss);
+}
+#endif
+
 struct kqemu_instance { 
 #if __FreeBSD_version >= 500000
     TAILQ_ENTRY(kqemu_instance) kqemu_ent;
Index: common/kernel.c
 -1030,6
+1030,9 
 #ifdef __x86_64__
     uint16_t saved_ds, saved_es;
     unsigned long fs_base, gs_base;
+#ifdef __FreeBSD__
+    struct kqemu_global_state *g = s->global_state;
+#endif
 #endif
     
 #ifdef PROFILE
 -1197,6
+1200,13 
             apic_restore_nmi(s, apic_nmi_mask);
         }
         profile_record(s);
+#ifdef __FreeBSD__
+#ifdef __x86_64__
+        spin_lock(&g->lock);
+        kqemu_tss_workaround();
+        spin_unlock(&g->lock);
+#endif
+#endif
 
         if (s->mon_req == MON_REQ_IRQ) {
             struct kqemu_exception_regs *r;
Index: kqemu-kernel.h
 -44,4
+44,10 
 
 void CDECL kqemu_log(const char *fmt, ...);
 
+#ifdef __FreeBSD__
+#ifdef __x86_64__
+void CDECL kqemu_tss_workaround(void);
+#endif
+#endif
+
 #endif /* KQEMU_KERNEL_H */
_______________________________________________
freebsd-amd64freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to
"freebsd-amd64-unsubscribefreebsd.org"

Re: seems I finally found what upset kqemu on amd64 SMP... shared gdt! (please test patch :)
user name
2008-05-01 05:19:51
On Wed, Apr 30, 2008 at 12:24:58AM +0200, Juergen Lock
wrote:
> Yeah, the amd64 kernel reuses the same gdt to setup all
cpus, causing
> kqemu to end up restoring the interrupt stackpointer
(after running
> guest code using its own cpu state) from the tss of the
last cpu,
> regardless which cpu it happened to run on.  And that
then causes the last
> cpu's (usually) idle thread's stack to get smashed and
the host doing
> multiple panics...  (Which also explains why pinning
qemu onto cpu 1
> worked on a 2-way host.)
> 
Hmm maybe the following is a little more clear:  kqemu sets
up its own
cpu state and has to save and restore the original state
because of that,
so among other things it does an str insn (store task
register), and later
an ltr insn (load task register) using the value it got from
the first
str insn.  That ltr insn loads the selector for the tss
which is stored
in the gdt, and that entry in the gdt is different for each
cpu, but since
a single gdt was reused to setup the cpus at boot (in
init_secondary() in
/sys/amd64/amd64/mp_machdep.c), it still points to the tss
for the last
cpu, instead of to the right one for the cpu the ltr insn
gets executed on.
That is what the kqemu_tss_workaround() in the patch
`fixes'...

>  Here's the patch I just tested, of course you'd want
to disable this
> once the gdt is no longer shared, so assuming someone
wants to fix this,
> please also do an OSVERSION bump...

 The patch applied with offsets (I still had debug code in
when I made it),
here is a rebased version:

Index: kqemu-freebsd.c
 -33,6
+33,11 
 
 #include <machine/vmparam.h>
 #include <machine/stdarg.h>
+#ifdef __x86_64__
+#include <sys/pcpu.h>
+#include <machine/segments.h>
+#include <machine/tss.h>
+#endif
 
 #include "kqemu-kernel.h"
 
 -234,6
+239,19 
     va_end(ap);
 }
 
+#ifdef __x86_64__
+/* called with interrupts disabled */
+void CDECL kqemu_tss_workaround(void)
+{
+    int gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+
+    gdt_segs[GPROC0_SEL].ssd_base = (long)
&common_tss[PCPU_GET(cpuid)];
+    ssdtosyssd(&gdt_segs[GPROC0_SEL],
+       (struct system_segment_descriptor
*)&gdt[GPROC0_SEL]);
+    ltr(gsel_tss);
+}
+#endif
+
 struct kqemu_instance { 
 #if __FreeBSD_version >= 500000
     TAILQ_ENTRY(kqemu_instance) kqemu_ent;
Index: common/kernel.c
 -1025,6
+1025,9 
 #ifdef __x86_64__
     uint16_t saved_ds, saved_es;
     unsigned long fs_base, gs_base;
+#ifdef __FreeBSD__
+    struct kqemu_global_state *g = s->global_state;
+#endif
 #endif
     
 #ifdef PROFILE
 -1188,6
+1191,13 
             apic_restore_nmi(s, apic_nmi_mask);
         }
         profile_record(s);
+#ifdef __FreeBSD__
+#ifdef __x86_64__
+        spin_lock(&g->lock);
+        kqemu_tss_workaround();
+        spin_unlock(&g->lock);
+#endif
+#endif
 
         if (s->mon_req == MON_REQ_IRQ) {
             struct kqemu_exception_regs *r;
Index: kqemu-kernel.h
 -44,4
+44,10 
 
 void CDECL kqemu_log(const char *fmt, ...);
 
+#ifdef __FreeBSD__
+#ifdef __x86_64__
+void CDECL kqemu_tss_workaround(void);
+#endif
+#endif
+
 #endif /* KQEMU_KERNEL_H */
_______________________________________________
freebsd-amd64freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to
"freebsd-amd64-unsubscribefreebsd.org"

Re: seems I finally found what upset kqemu on amd64 SMP... shared gdt! (please test patch :)
country flaguser name
United States
2008-05-01 09:11:06
On Thursday 01 May 2008 06:19:51 am Juergen Lock wrote:
> On Wed, Apr 30, 2008 at 12:24:58AM +0200, Juergen Lock
wrote:
> > Yeah, the amd64 kernel reuses the same gdt to
setup all cpus, causing
> > kqemu to end up restoring the interrupt
stackpointer (after running
> > guest code using its own cpu state) from the tss
of the last cpu,
> > regardless which cpu it happened to run on.  And
that then causes the
> > last cpu's (usually) idle thread's stack to get
smashed and the host
> > doing multiple panics...  (Which also explains why
pinning qemu onto cpu
> > 1 worked on a 2-way host.)
>
> Hmm maybe the following is a little more clear:  kqemu
sets up its own
> cpu state and has to save and restore the original
state because of that,
> so among other things it does an str insn (store task
register), and later
> an ltr insn (load task register) using the value it got
from the first
> str insn.  That ltr insn loads the selector for the tss
which is stored
> in the gdt, and that entry in the gdt is different for
each cpu, but since
> a single gdt was reused to setup the cpus at boot (in
init_secondary() in
> /sys/amd64/amd64/mp_machdep.c), it still points to the
tss for the last
> cpu, instead of to the right one for the cpu the ltr
insn gets executed on.
> That is what the kqemu_tss_workaround() in the patch
`fixes'...

Perhaps kqemu shouldn't be doing str/ltr on amd64 instead? 
The things i386 
uses a separate tss for in the kernel (separate stack for
double faults) is 
handled differently on amd64 (on amd64 we make the double
fault handler use 
one of the IST stacks).

> >  Here's the patch I just tested, of course you'd
want to disable this
> > once the gdt is no longer shared, so assuming
someone wants to fix this,
> > please also do an OSVERSION bump...
>
>  The patch applied with offsets (I still had debug code
in when I made it),
> here is a rebased version:
>
> Index: kqemu-freebsd.c
>  -33,6 +33,11 
>
>  #include <machine/vmparam.h>
>  #include <machine/stdarg.h>
> +#ifdef __x86_64__
> +#include <sys/pcpu.h>
> +#include <machine/segments.h>
> +#include <machine/tss.h>
> +#endif
>
>  #include "kqemu-kernel.h"
>
>  -234,6 +239,19 
>      va_end(ap);
>  }
>
> +#ifdef __x86_64__
> +/* called with interrupts disabled */
> +void CDECL kqemu_tss_workaround(void)
> +{
> +    int gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
> +
> +    gdt_segs[GPROC0_SEL].ssd_base = (long)
&common_tss[PCPU_GET(cpuid)];
> +    ssdtosyssd(&gdt_segs[GPROC0_SEL],
> +       (struct system_segment_descriptor
*)&gdt[GPROC0_SEL]);
> +    ltr(gsel_tss);
> +}
> +#endif
> +
>  struct kqemu_instance {
>  #if __FreeBSD_version >= 500000
>      TAILQ_ENTRY(kqemu_instance) kqemu_ent;
> Index: common/kernel.c
>  -1025,6 +1025,9 
>  #ifdef __x86_64__
>      uint16_t saved_ds, saved_es;
>      unsigned long fs_base, gs_base;
> +#ifdef __FreeBSD__
> +    struct kqemu_global_state *g =
s->global_state;
> +#endif
>  #endif
>
>  #ifdef PROFILE
>  -1188,6 +1191,13 
>              apic_restore_nmi(s, apic_nmi_mask);
>          }
>          profile_record(s);
> +#ifdef __FreeBSD__
> +#ifdef __x86_64__
> +        spin_lock(&g->lock);
> +        kqemu_tss_workaround();
> +        spin_unlock(&g->lock);
> +#endif
> +#endif
>
>          if (s->mon_req == MON_REQ_IRQ) {
>              struct kqemu_exception_regs *r;
> Index: kqemu-kernel.h
>  -44,4 +44,10 
>
>  void CDECL kqemu_log(const char *fmt, ...);
>
> +#ifdef __FreeBSD__
> +#ifdef __x86_64__
> +void CDECL kqemu_tss_workaround(void);
> +#endif
> +#endif
> +
>  #endif /* KQEMU_KERNEL_H */
> _______________________________________________
> freebsd-amd64freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-amd64
> To unsubscribe, send any mail to
"freebsd-amd64-unsubscribefreebsd.org"



-- 
John Baldwin
_______________________________________________
freebsd-amd64freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to
"freebsd-amd64-unsubscribefreebsd.org"

Re: seems I finally found what upset kqemu on amd64 SMP... shared gdt! (please test patch :)
user name
2008-05-01 10:53:04
On Thu, May 01, 2008 at 10:11:06AM -0400, John Baldwin
wrote:
> On Thursday 01 May 2008 06:19:51 am Juergen Lock
wrote:
> > On Wed, Apr 30, 2008 at 12:24:58AM +0200, Juergen
Lock wrote:
> > > Yeah, the amd64 kernel reuses the same gdt to
setup all cpus, causing
> > > kqemu to end up restoring the interrupt
stackpointer (after running
> > > guest code using its own cpu state) from the
tss of the last cpu,
> > > regardless which cpu it happened to run on. 
And that then causes the
> > > last cpu's (usually) idle thread's stack to
get smashed and the host
> > > doing multiple panics...  (Which also
explains why pinning qemu onto cpu
> > > 1 worked on a 2-way host.)
> >
> > Hmm maybe the following is a little more clear: 
kqemu sets up its own
> > cpu state and has to save and restore the original
state because of that,
> > so among other things it does an str insn (store
task register), and later
> > an ltr insn (load task register) using the value
it got from the first
> > str insn.  That ltr insn loads the selector for
the tss which is stored
> > in the gdt, and that entry in the gdt is different
for each cpu, but since
> > a single gdt was reused to setup the cpus at boot
(in init_secondary() in
> > /sys/amd64/amd64/mp_machdep.c), it still points to
the tss for the last
> > cpu, instead of to the right one for the cpu the
ltr insn gets executed on.
> > That is what the kqemu_tss_workaround() in the
patch `fixes'...
> 
> Perhaps kqemu shouldn't be doing str/ltr on amd64
instead?  The things i386 
> uses a separate tss for in the kernel (separate stack
for double faults) is 
> handled differently on amd64 (on amd64 we make the
double fault handler use 
> one of the IST stacks).

Well, kqemu uses its own gdt, tss and everything while
running guest code
in its monitor, so it kinda has to do the str/ltr.s to setup
its stuff, run
guest code, and then restore the original state of things. 
(And `restore
original state of things' is what failed here.)

 Oh and also the tss does seem to be used for the interrupt
stack on
amd64 too, at least thats the one that ended up wrong and
caused the panics
I saw...

	Juergen
_______________________________________________
freebsd-amd64freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to
"freebsd-amd64-unsubscribefreebsd.org"

Re: seems I finally found what upset kqemu on amd64 SMP... shared gdt! (please test patch :)
country flaguser name
United States
2008-05-01 12:35:06
On Thursday 01 May 2008 11:53:04 am Juergen Lock wrote:
> On Thu, May 01, 2008 at 10:11:06AM -0400, John Baldwin
wrote:
> > On Thursday 01 May 2008 06:19:51 am Juergen Lock
wrote:
> > > On Wed, Apr 30, 2008 at 12:24:58AM +0200,
Juergen Lock wrote:
> > > > Yeah, the amd64 kernel reuses the same
gdt to setup all cpus, causing
> > > > kqemu to end up restoring the interrupt
stackpointer (after running
> > > > guest code using its own cpu state) from
the tss of the last cpu,
> > > > regardless which cpu it happened to run
on.  And that then causes the
> > > > last cpu's (usually) idle thread's stack
to get smashed and the host
> > > > doing multiple panics...  (Which also
explains why pinning qemu onto 
cpu
> > > > 1 worked on a 2-way host.)
> > >
> > > Hmm maybe the following is a little more
clear:  kqemu sets up its own
> > > cpu state and has to save and restore the
original state because of 
that,
> > > so among other things it does an str insn
(store task register), and 
later
> > > an ltr insn (load task register) using the
value it got from the first
> > > str insn.  That ltr insn loads the selector
for the tss which is stored
> > > in the gdt, and that entry in the gdt is
different for each cpu, but 
since
> > > a single gdt was reused to setup the cpus at
boot (in init_secondary() 
in
> > > /sys/amd64/amd64/mp_machdep.c), it still
points to the tss for the last
> > > cpu, instead of to the right one for the cpu
the ltr insn gets executed 
on.
> > > That is what the kqemu_tss_workaround() in
the patch `fixes'...
> > 
> > Perhaps kqemu shouldn't be doing str/ltr on amd64
instead?  The things 
i386 
> > uses a separate tss for in the kernel (separate
stack for double faults) 
is 
> > handled differently on amd64 (on amd64 we make the
double fault handler 
use 
> > one of the IST stacks).
> 
> Well, kqemu uses its own gdt, tss and everything while
running guest code
> in its monitor, so it kinda has to do the str/ltr.s to
setup its stuff, run
> guest code, and then restore the original state of
things.  (And `restore
> original state of things' is what failed here.)
> 
>  Oh and also the tss does seem to be used for the
interrupt stack on
> amd64 too, at least thats the one that ended up wrong
and caused the panics
> I saw...

The single TSS holds the IST pointers.  On i386 we use a
separate TSS for 
double faults, but on amd64 a double fault uses the same TSS
but uses the IST 
pointers from that same TSS.  The TSS also holds the ring
stack pointer for 
when syscalls, interrupts, and traps from userland cross
from ring 3 to ring 
0 which is probably why you got a panic.

Because of the fact that amd64 in normal operation never
changes the task 
register (and that the gdt isn't used quite the same either,
all the per-cpu 
stuff is via FSBASE and GSBASE) I don't expect the kernel to
change to use a 
per-cpu gdt or the like.  I think you will need to use the
current approach 
of patching kqemu to fixup the tss/gdt when reloading the
task register.  You 
might want to make it a regular part of the code rather than
a workaround as 
a result.

-- 
John Baldwin
_______________________________________________
freebsd-amd64freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to
"freebsd-amd64-unsubscribefreebsd.org"

Re: seems I finally found what upset kqemu on amd64 SMP... shared gdt! (please test patch :)
country flaguser name
United States
2008-05-05 08:50:57
On Saturday 03 May 2008 09:11:39 am Juergen Lock wrote:
> On Thu, May 01, 2008 at 01:35:06PM -0400, John Baldwin
wrote:
> > On Thursday 01 May 2008 11:53:04 am Juergen Lock
wrote:
> > > On Thu, May 01, 2008 at 10:11:06AM -0400,
John Baldwin wrote:
> > > > On Thursday 01 May 2008 06:19:51 am
Juergen Lock wrote:
> > > > > On Wed, Apr 30, 2008 at 12:24:58AM
+0200, Juergen Lock wrote:
> > > > > > Yeah, the amd64 kernel reuses
the same gdt to setup all cpus,
> > > > > > causing kqemu to end up
restoring the interrupt stackpointer
> > > > > > (after running guest code
using its own cpu state) from the tss
> > > > > > of the last cpu, regardless
which cpu it happened to run on.  And
> > > > > > that then causes the last
cpu's (usually) idle thread's stack to
> > > > > > get smashed and the host doing
multiple panics...  (Which also
> > > > > > explains why pinning qemu
onto
> >
> > cpu
> >
> > > > > > 1 worked on a 2-way host.)
> > > > >
> > > > > Hmm maybe the following is a little
more clear:  kqemu sets up its
> > > > > own cpu state and has to save and
restore the original state
> > > > > because of
> >
> > that,
> >
> > > > > so among other things it does an
str insn (store task register),
> > > > > and
> >
> > later
> >
> > > > > an ltr insn (load task register)
using the value it got from the
> > > > > first str insn.  That ltr insn
loads the selector for the tss which
> > > > > is stored in the gdt, and that
entry in the gdt is different for
> > > > > each cpu, but
> >
> > since
> >
> > > > > a single gdt was reused to setup
the cpus at boot (in
> > > > > init_secondary()
> >
> > in
> >
> > > > > /sys/amd64/amd64/mp_machdep.c), it
still points to the tss for the
> > > > > last cpu, instead of to the right
one for the cpu the ltr insn gets
> > > > > executed
> >
> > on.
> >
> > > > > That is what the
kqemu_tss_workaround() in the patch `fixes'...
> > > >
> > > > Perhaps kqemu shouldn't be doing str/ltr
on amd64 instead?  The
> > > > things
> >
> > i386
> >
> > > > uses a separate tss for in the kernel
(separate stack for double
> > > > faults)
> >
> > is
> >
> > > > handled differently on amd64 (on amd64
we make the double fault
> > > > handler
> >
> > use
> >
> > > > one of the IST stacks).
> > >
> > > Well, kqemu uses its own gdt, tss and
everything while running guest
> > > code in its monitor, so it kinda has to do
the str/ltr.s to setup its
> > > stuff, run guest code, and then restore the
original state of things. 
> > > (And `restore original state of things' is
what failed here.)
> > >
> > >  Oh and also the tss does seem to be used for
the interrupt stack on
> > > amd64 too, at least thats the one that ended
up wrong and caused the
> > > panics I saw...
> >
> > The single TSS holds the IST pointers.  On i386 we
use a separate TSS for
> > double faults, but on amd64 a double fault uses
the same TSS but uses the
> > IST pointers from that same TSS.  The TSS also
holds the ring stack
> > pointer for when syscalls, interrupts, and traps
from userland cross from
> > ring 3 to ring 0 which is probably why you got a
panic.
>
> Yeah thats where it happened.
>
> > Because of the fact that amd64 in normal operation
never changes the task
> > register (and that the gdt isn't used quite the
same either, all the
> > per-cpu stuff is via FSBASE and GSBASE) I don't
expect the kernel to
> > change to use a per-cpu gdt or the like.  I think
you will need to use
> > the current approach of patching kqemu to fixup
the tss/gdt when
> > reloading the task register.  You might want to
make it a regular part of
> > the code rather than a workaround as a result.
>
>  Hmm okay, how would you call it then,
kqemu_tss_fixup?

Sure.

-- 
John Baldwin
_______________________________________________
freebsd-amd64freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to
"freebsd-amd64-unsubscribefreebsd.org"

Re: seems I finally found what upset kqemu on amd64 SMP... shared gdt! (please test patch :)
user name
2008-05-03 08:11:39
On Thu, May 01, 2008 at 01:35:06PM -0400, John Baldwin
wrote:
> On Thursday 01 May 2008 11:53:04 am Juergen Lock
wrote:
> > On Thu, May 01, 2008 at 10:11:06AM -0400, John
Baldwin wrote:
> > > On Thursday 01 May 2008 06:19:51 am Juergen
Lock wrote:
> > > > On Wed, Apr 30, 2008 at 12:24:58AM
+0200, Juergen Lock wrote:
> > > > > Yeah, the amd64 kernel reuses the
same gdt to setup all cpus, causing
> > > > > kqemu to end up restoring the
interrupt stackpointer (after running
> > > > > guest code using its own cpu state)
from the tss of the last cpu,
> > > > > regardless which cpu it happened to
run on.  And that then causes the
> > > > > last cpu's (usually) idle thread's
stack to get smashed and the host
> > > > > doing multiple panics...  (Which
also explains why pinning qemu onto 
> cpu
> > > > > 1 worked on a 2-way host.)
> > > >
> > > > Hmm maybe the following is a little more
clear:  kqemu sets up its own
> > > > cpu state and has to save and restore
the original state because of 
> that,
> > > > so among other things it does an str
insn (store task register), and 
> later
> > > > an ltr insn (load task register) using
the value it got from the first
> > > > str insn.  That ltr insn loads the
selector for the tss which is stored
> > > > in the gdt, and that entry in the gdt is
different for each cpu, but 
> since
> > > > a single gdt was reused to setup the
cpus at boot (in init_secondary() 
> in
> > > > /sys/amd64/amd64/mp_machdep.c), it still
points to the tss for the last
> > > > cpu, instead of to the right one for the
cpu the ltr insn gets executed 
> on.
> > > > That is what the kqemu_tss_workaround()
in the patch `fixes'...
> > > 
> > > Perhaps kqemu shouldn't be doing str/ltr on
amd64 instead?  The things 
> i386 
> > > uses a separate tss for in the kernel
(separate stack for double faults) 
> is 
> > > handled differently on amd64 (on amd64 we
make the double fault handler 
> use 
> > > one of the IST stacks).
> > 
> > Well, kqemu uses its own gdt, tss and everything
while running guest code
> > in its monitor, so it kinda has to do the
str/ltr.s to setup its stuff, run
> > guest code, and then restore the original state of
things.  (And `restore
> > original state of things' is what failed here.)
> > 
> >  Oh and also the tss does seem to be used for the
interrupt stack on
> > amd64 too, at least thats the one that ended up
wrong and caused the panics
> > I saw...
> 
> The single TSS holds the IST pointers.  On i386 we use
a separate TSS for 
> double faults, but on amd64 a double fault uses the
same TSS but uses the IST 
> pointers from that same TSS.  The TSS also holds the
ring stack pointer for 
> when syscalls, interrupts, and traps from userland
cross from ring 3 to ring 
> 0 which is probably why you got a panic.
> 
Yeah thats where it happened.

> Because of the fact that amd64 in normal operation
never changes the task 
> register (and that the gdt isn't used quite the same
either, all the per-cpu 
> stuff is via FSBASE and GSBASE) I don't expect the
kernel to change to use a 
> per-cpu gdt or the like.  I think you will need to use
the current approach 
> of patching kqemu to fixup the tss/gdt when reloading
the task register.  You 
> might want to make it a regular part of the code rather
than a workaround as 
> a result.
> 
 Hmm okay, how would you call it then, kqemu_tss_fixup?

	Juergen
_______________________________________________
freebsd-amd64freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to
"freebsd-amd64-unsubscribefreebsd.org"

Re: seems I finally found what upset kqemu on amd64 SMP... shared gdt! (please test patch :)
user name
2008-05-06 13:19:31
On Mon, May 05, 2008 at 09:50:57AM -0400, John Baldwin
wrote:
> On Saturday 03 May 2008 09:11:39 am Juergen Lock
wrote:
> > On Thu, May 01, 2008 at 01:35:06PM -0400, John
Baldwin wrote:
> > > On Thursday 01 May 2008 11:53:04 am Juergen
Lock wrote:
> > > > On Thu, May 01, 2008 at 10:11:06AM
-0400, John Baldwin wrote:
> > > > > On Thursday 01 May 2008 06:19:51 am
Juergen Lock wrote:
> > > > > > On Wed, Apr 30, 2008 at
12:24:58AM +0200, Juergen Lock wrote:
> > > > > > > Yeah, the amd64 kernel
reuses the same gdt to setup all cpus,
> > > > > > > causing kqemu to end up
restoring the interrupt stackpointer
> > > > > > > (after running guest code
using its own cpu state) from the tss
> > > > > > > of the last cpu,
regardless which cpu it happened to run on.  And
> > > > > > > that then causes the last
cpu's (usually) idle thread's stack to
> > > > > > > get smashed and the host
doing multiple panics...  (Which also
> > > > > > > explains why pinning qemu
onto
> > >
> > > cpu
> > >
> > > > > > > 1 worked on a 2-way
host.)
> > > > > >
> > > > > > Hmm maybe the following is a
little more clear:  kqemu sets up its
> > > > > > own cpu state and has to save
and restore the original state
> > > > > > because of
> > >
> > > that,
> > >
> > > > > > so among other things it does
an str insn (store task register),
> > > > > > and
> > >
> > > later
> > >
> > > > > > an ltr insn (load task
register) using the value it got from the
> > > > > > first str insn.  That ltr insn
loads the selector for the tss which
> > > > > > is stored in the gdt, and that
entry in the gdt is different for
> > > > > > each cpu, but
> > >
> > > since
> > >
> > > > > > a single gdt was reused to
setup the cpus at boot (in
> > > > > > init_secondary()
> > >
> > > in
> > >
> > > > > >
/sys/amd64/amd64/mp_machdep.c), it still points to the tss
for the
> > > > > > last cpu, instead of to the
right one for the cpu the ltr insn gets
> > > > > > executed
> > >
> > > on.
> > >
> > > > > > That is what the
kqemu_tss_workaround() in the patch `fixes'...
> > > > >
> > > > > Perhaps kqemu shouldn't be doing
str/ltr on amd64 instead?  The
> > > > > things
> > >
> > > i386
> > >
> > > > > uses a separate tss for in the
kernel (separate stack for double
> > > > > faults)
> > >
> > > is
> > >
> > > > > handled differently on amd64 (on
amd64 we make the double fault
> > > > > handler
> > >
> > > use
> > >
> > > > > one of the IST stacks).
> > > >
> > > > Well, kqemu uses its own gdt, tss and
everything while running guest
> > > > code in its monitor, so it kinda has to
do the str/ltr.s to setup its
> > > > stuff, run guest code, and then restore
the original state of things. 
> > > > (And `restore original state of things'
is what failed here.)
> > > >
> > > >  Oh and also the tss does seem to be
used for the interrupt stack on
> > > > amd64 too, at least thats the one that
ended up wrong and caused the
> > > > panics I saw...
> > >
> > > The single TSS holds the IST pointers.  On
i386 we use a separate TSS for
> > > double faults, but on amd64 a double fault
uses the same TSS but uses the
> > > IST pointers from that same TSS.  The TSS
also holds the ring stack
> > > pointer for when syscalls, interrupts, and
traps from userland cross from
> > > ring 3 to ring 0 which is probably why you
got a panic.
> >
> > Yeah thats where it happened.
> >
> > > Because of the fact that amd64 in normal
operation never changes the task
> > > register (and that the gdt isn't used quite
the same either, all the
> > > per-cpu stuff is via FSBASE and GSBASE) I
don't expect the kernel to
> > > change to use a per-cpu gdt or the like.  I
think you will need to use
> > > the current approach of patching kqemu to
fixup the tss/gdt when
> > > reloading the task register.  You might want
to make it a regular part of
> > > the code rather than a workaround as a
result.
> >
> >  Hmm okay, how would you call it then,
kqemu_tss_fixup?
> 
> Sure.

Okay, I shall rename it at the next kqemu commit.

 Thanx,
	Juergen
_______________________________________________
freebsd-amd64freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to
"freebsd-amd64-unsubscribefreebsd.org"

Re: seems I finally found what upset kqemu on amd64 SMP... shared gdt! (please test new patch :)
user name
2008-05-11 11:07:48
On Thu, May 01, 2008 at 01:35:06PM -0400, John Baldwin
wrote:
> On Thursday 01 May 2008 11:53:04 am Juergen Lock
wrote:
> > On Thu, May 01, 2008 at 10:11:06AM -0400, John
Baldwin wrote:
> > > On Thursday 01 May 2008 06:19:51 am Juergen
Lock wrote:
> > > > On Wed, Apr 30, 2008 at 12:24:58AM
+0200, Juergen Lock wrote:
> > > > > Yeah, the amd64 kernel reuses the
same gdt to setup all cpus, causing
> > > > > kqemu to end up restoring the
interrupt stackpointer (after running
> > > > > guest code using its own cpu state)
from the tss of the last cpu,
> > > > > regardless which cpu it happened to
run on.  And that then causes the
> > > > > last cpu's (usually) idle thread's
stack to get smashed and the host
> > > > > doing multiple panics...  (Which
also explains why pinning qemu onto 
> cpu
> > > > > 1 worked on a 2-way host.)
> > > >
> > > > Hmm maybe the following is a little more
clear:  kqemu sets up its own
> > > > cpu state and has to save and restore
the original state because of 
> that,
> > > > so among other things it does an str
insn (store task register), and 
> later
> > > > an ltr insn (load task register) using
the value it got from the first
> > > > str insn.  That ltr insn loads the
selector for the tss which is stored
> > > > in the gdt, and that entry in the gdt is
different for each cpu, but 
> since
> > > > a single gdt was reused to setup the
cpus at boot (in init_secondary() 
> in
> > > > /sys/amd64/amd64/mp_machdep.c), it still
points to the tss for the last
> > > > cpu, instead of to the right one for the
cpu the ltr insn gets executed 
> on.
> > > > That is what the kqemu_tss_workaround()
in the patch `fixes'...
> > > 
> > > Perhaps kqemu shouldn't be doing str/ltr on
amd64 instead?  The things 
> i386 
> > > uses a separate tss for in the kernel
(separate stack for double faults) 
> is 
> > > handled differently on amd64 (on amd64 we
make the double fault handler 
> use 
> > > one of the IST stacks).
> > 
> > Well, kqemu uses its own gdt, tss and everything
while running guest code
> > in its monitor, so it kinda has to do the
str/ltr.s to setup its stuff, run
> > guest code, and then restore the original state of
things.  (And `restore
> > original state of things' is what failed here.)
> > 
> >  Oh and also the tss does seem to be used for the
interrupt stack on
> > amd64 too, at least thats the one that ended up
wrong and caused the panics
> > I saw...
> 
> The single TSS holds the IST pointers.  On i386 we use
a separate TSS for 
> double faults, but on amd64 a double fault uses the
same TSS but uses the IST 
> pointers from that same TSS.  The TSS also holds the
ring stack pointer for 
> when syscalls, interrupts, and traps from userland
cross from ring 3 to ring 
> 0 which is probably why you got a panic.
> 
> Because of the fact that amd64 in normal operation
never changes the task 
> register (and that the gdt isn't used quite the same
either, all the per-cpu 
> stuff is via FSBASE and GSBASE) I don't expect the
kernel to change to use a 
> per-cpu gdt or the like.  I think you will need to use
the current approach 
> of patching kqemu to fixup the tss/gdt when reloading
the task register.  You 
> might want to make it a regular part of the code rather
than a workaround as 
> a result.

Ok I renamed the function now.  I was mad aware of another
problem tho,
(hi Yamagi!   - running
multiple qemu instances can still panic/reboot
the box probably because the hardware does some lazy
evaluation/loading
(or maybe its a cache coherency issue?), so I thought it was
safer to use
seperate per-cpu gdts after all.  The following patch
survived a quick test
that the old version didn't (two 7.0-livefs guests running
find /dist in
fixit), tho I'm not sure about the correctness of the values
I used to
reload MSR_KGSBASE and MSR_FSBASE after lgdt() (anyone here
know offhand?
Yeah I could just save/reload them like the rest of the code
does, but if
they can be set from available data instead...)

 Here comes the patch (also at
	http://people.freebsd.org/~nox/qemu/kqemu-kmod-tss
-cpldt.patch
)

Index: Makefile
============================================================
=======
RCS file: /home/pcvs/ports/emulators/kqemu-kmod/Makefile,v
retrieving revision 1.24
diff -u -p -r1.24 Makefile
--- Makefile	11 May 2008 10:59:20 -0000	1.24
+++ Makefile	11 May 2008 15:06:08 -0000
 -7,7
+7,7 
 
 PORTNAME=	kqemu
 PORTVERSION=	1.3.0.p11
-PORTREVISION=	5
+PORTREVISION=	6
 CATEGORIES=	emulators kld
 MASTER_SITES=	http://fabrice.b
ellard.free.fr/qemu/ 
 		http://qemu.org/ 
Index: files/patch-tssworkaround
============================================================
=======
RCS file:
/home/pcvs/ports/emulators/kqemu-kmod/files/patch-tssworkaro
und,v
retrieving revision 1.2
diff -u -p -r1.2 patch-tssworkaround
--- files/patch-tssworkaround	11 May 2008 10:59:20
-0000	1.2
+++ files/patch-tssworkaround	11 May 2008 15:08:41 -0000
 -1,29
+1,70 
 Index: kqemu-freebsd.c
- -38,6 +38,11 
+ -38,6 +38,14 
  #else
  #include <machine/npx.h>
  #endif
 +#ifdef __x86_64__
++#include <sys/smp.h>
 +#include <sys/pcpu.h>
++#include <machine/pcb.h>
++#include <machine/specialreg.h>
 +#include <machine/segments.h>
 +#include <machine/tss.h>
 +#endif
  
  #include "kqemu-kernel.h"
  
- -248,6 +253,19 
+ -248,6 +256,57 
      va_end(ap);
  }
  
 +#ifdef __x86_64__
++int kqemu_cpu0gdtfixed;
++int kqemu_gdts_used;
++struct user_segment_descriptor *kqemu_gdts;
++struct region_descriptor kqemu_r_newgdt;
++extern  struct pcpu __pcpu[];
++
 +/* called with interrupts disabled */
-+void CDECL kqemu_tss_fixup(void)
++void CDECL kqemu_tss_fixup(unsigned long kerngdtbase)
 +{
 +    int gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
++    unsigned cpuid = PCPU_GET(cpuid);
++    struct user_segment_descriptor *newgdt = gdt;
 +
-+    gdt_segs[GPROC0_SEL].ssd_base = (long)
&common_tss[PCPU_GET(cpuid)];
++    if (mp_ncpus <= 1 || kerngdtbase != (unsigned
long)&gdt)
++	/* UP host or gdt already moved, nothing to do */
++	return;
++    if (cpuid) {
++	/* move gdts of all but first cpu */
++	if (!kqemu_gdts)
++	    /*
++	     * XXX gdt is allocated as
++	     *	struct user_segment_descriptor gdt[NGDT *
MAXCPU];
++	     * so it has room for the moved copies; need to
allocate at
++	     * kldload (and only free if kqemu_gdts_used is zero)
should this
++	     * change in the future
++	     */
++	    kqemu_gdts = &gdt[NGDT];
++	++kqemu_gdts_used;
++	newgdt = &kqemu_gdts[NGDT * (cpuid - 1)];
++	bcopy(&gdt, newgdt, NGDT * sizeof(gdt[0]));
++	kqemu_r_newgdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
++	kqemu_r_newgdt.rd_base = (long) newgdt;
++    } else {
++	if (kqemu_cpu0gdtfixed)
++	    return;
++	++kqemu_cpu0gdtfixed;
++    }
++    gdt_segs[GPROC0_SEL].ssd_base = (long)
&common_tss[cpuid];
 +    ssdtosyssd(&gdt_segs[GPROC0_SEL],
-+       (struct system_segment_descriptor
*)&gdt[GPROC0_SEL]);
++       (struct system_segment_descriptor
*)&newgdt[GPROC0_SEL]);
++    if (cpuid) {
++	lgdt(&kqemu_r_newgdt);
++	wrmsr(MSR_GSBASE, (u_int64_t)&__pcpu[cpuid]);
++	wrmsr(MSR_KGSBASE, curthread->td_pcb->pcb_gsbase);
++	wrmsr(MSR_FSBASE, 0);
++    }
 +    ltr(gsel_tss);
 +}
 +#endif
 -49,7
+90,7  Index: common/kernel.c
 +#ifdef __FreeBSD__
 +#ifdef __x86_64__
 +        spin_lock(&g->lock);
-+        kqemu_tss_fixup();
++        kqemu_tss_fixup(s->kernel_gdt.base);
 +        spin_unlock(&g->lock);
 +#endif
 +#endif
 -57,13
+98,13  Index: common/kernel.c
          if (s->mon_req == MON_REQ_IRQ) {
              struct kqemu_exception_regs *r;
 Index: kqemu-kernel.h
- -44,4 +44,10 
+ -48,4 +48,10 
  
  void CDECL kqemu_log(const char *fmt, ...);
  
 +#ifdef __FreeBSD__
 +#ifdef __x86_64__
-+void CDECL kqemu_tss_fixup(void);
++void CDECL kqemu_tss_fixup(unsigned long kerngdtbase);
 +#endif
 +#endif
 +
_______________________________________________
freebsd-amd64freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-amd64
To unsubscribe, send any mail to
"freebsd-amd64-unsubscribefreebsd.org"

[1-9]

about | contact  Other archives ( Real Estate discussion Medical topics )