On Jun 6, 2007, at 9:54 AM, Izumi Tsutsui wrote:
> I wrote:
>
>> Summary:
>> TX on sn0 RX on sn0
>> MD: 1026KB/s 846KB/s
>> MI: 793KB/s 854KB/s
>
> more results:
>
> MI driver with BUS_DMA_COHERENT support:
> TX on sn0 RX on sn0
> 842KB/s 888KB/s
>
> MI driver with BUS_DMA_COHERENT support and 16bytes TX
DMA threshold:
> TX on sn0 RX on sn0
> 903KB/s 886KB/s
Not too bad! Please check in the bus_dma enhancements!
>
>
> ---
> Izumi Tsutsui
>
>
> Index: arch/m68k/include/bus_dma.h
>
============================================================
=======
> RCS file:
/cvsroot/src/sys/arch/m68k/include/bus_dma.h,v
> retrieving revision 1.8
> diff -u -r1.8 bus_dma.h
> --- arch/m68k/include/bus_dma.h 4 Mar 2007 06:00:04
-0000 1.8
> +++ arch/m68k/include/bus_dma.h 6 Jun 2007 16:48:19
-0000
>  -119,6 +119,7 
> struct m68k_bus_dma_segment {
> bus_addr_t ds_addr; /* DMA address */
> bus_size_t ds_len; /* length of transfer */
> + u_int _ds_flags; /* MD flags */
> };
> typedef struct m68k_bus_dma_segment bus_dma_segment_t;
>
>  -215,7 +216,7 
> int _dm_segcnt; /* number of segs this map can map
*/
> bus_size_t _dm_maxmaxsegsz; /* fixed largest possible
segment */
> bus_size_t _dm_boundary; /* don't cross this */
> - int _dm_flags; /* misc. flags */
> + u_int _dm_flags; /* misc. flags */
>
> /* Machine dependant fields: */
> bus_size_t dm_xfer_len; /* length of successful
transfer */
> Index: arch/m68k/include/pmap_motorola.h
>
============================================================
=======
> RCS file:
/cvsroot/src/sys/arch/m68k/include/pmap_motorola.h,v
> retrieving revision 1.13
> diff -u -r1.13 pmap_motorola.h
> --- arch/m68k/include/pmap_motorola.h 12 May 2007
17:43:53 -0000 1.13
> +++ arch/m68k/include/pmap_motorola.h 6 Jun 2007
16:48:19 -0000
>  -202,10 +202,8 
> #define PMAP_PREFER(foff, vap, sz,
td) pmap_prefer((foff), (vap))
> #endif
>
> -#ifdef mvme68k
> void _pmap_set_page_cacheable(struct pmap *, vaddr_t);
> void _pmap_set_page_cacheinhibit(struct pmap *,
vaddr_t);
> int _pmap_page_is_cacheable(struct pmap *, vaddr_t);
> -#endif
>
> #endif /* !_M68K_PMAP_MOTOROLA_H_ */
> Index: arch/m68k/m68k/bus_dma.c
>
============================================================
=======
> RCS file: /cvsroot/src/sys/arch/m68k/m68k/bus_dma.c,v
> retrieving revision 1.23
> diff -u -r1.23 bus_dma.c
> --- arch/m68k/m68k/bus_dma.c 2 Jun 2007 11:13:45
-0000 1.23
> +++ arch/m68k/m68k/bus_dma.c 6 Jun 2007 16:48:19 -0000
>  -141,23 +141,30 
> bus_size_t sgsize;
> bus_addr_t curaddr, lastaddr, baddr, bmask;
> vaddr_t vaddr = (vaddr_t)buf;
> - int seg;
> + int seg, cacheable, coherent;
> + pmap_t pmap;
> bool rv;
>
> + coherent = BUS_DMA_COHERENT;
> lastaddr = *lastaddrp;
> bmask = ~(map->_dm_boundary - 1);
> + if (!VMSPACE_IS_KERNEL_P(vm))
> + pmap = vm_map_pmap(&vm->vm_map);
> + else
> + pmap = pmap_kernel();
>
> for (seg = *segp; buflen > 0 ; ) {
> /*
> * Get the physical address for this segment.
> */
> - if (!VMSPACE_IS_KERNEL_P(vm))
> - rv = pmap_extract(vm_map_pmap(&vm->vm_map),
> - vaddr, &curaddr);
> - else
> - rv = pmap_extract(pmap_kernel(), vaddr,
&curaddr);
> + rv = pmap_extract(pmap, vaddr, &curaddr);
> KASSERT(rv);
>
> + cacheable = _pmap_page_is_cacheable(pmap, vaddr);
> +
> + if (cacheable)
> + coherent = 0;
> +
> /*
> * Compute the segment size, and adjust counts.
> */
>  -181,6 +188,8 
> if (first) {
> map->dm_segs[seg].ds_addr = curaddr;
> map->dm_segs[seg].ds_len = sgsize;
> + map->dm_segs[seg]._ds_flags =
> + cacheable ? 0 : BUS_DMA_COHERENT;
> first = 0;
> } else {
> if (curaddr == lastaddr &&
>  -195,6 +204,8 
> break;
> map->dm_segs[seg].ds_addr = curaddr;
> map->dm_segs[seg].ds_len = sgsize;
> + map->dm_segs[seg]._ds_flags =
> + cacheable ? 0 : BUS_DMA_COHERENT;
> }
> }
>
>  -205,6 +216,9 
>
> *segp = seg;
> *lastaddrp = lastaddr;
> + map->_dm_flags &= ~BUS_DMA_COHERENT;
> + /* BUS_DMA_COHERENT is set only if all segments are
uncached */
> + map->_dm_flags |= coherent;
>
> /*
> * Did we fit?
>  -408,6 +422,7 
> map->dm_maxsegsz = map->_dm_maxmaxsegsz;
> map->dm_mapsize = 0;
> map->dm_nsegs = 0;
> + map->_dm_flags &= ~BUS_DMA_COHERENT;
> }
>
> /*
>  -426,6 +441,7 
> #if defined(M68040) || defined(M68060)
> bus_addr_t p, e, ps, pe;
> bus_size_t seglen;
> + bus_dma_segment_t *seg;
> int i;
> #endif
>
>  -438,6 +454,10 
> #endif
>
> #if defined(M68040) || defined(M68060)
> + /* If the whole DMA map is uncached, do nothing. */
> + if ((map->_dm_flags & BUS_DMA_COHERENT) != 0)
> + return;
> +
> /* Short-circuit for unsupported `ops' */
> if ((ops & (BUS_DMASYNC_PREREAD |
BUS_DMASYNC_PREWRITE)) == 0)
> return;
>  -446,9 +466,10 
> * flush/purge the cache.
> */
> for (i = 0; i < map->dm_nsegs && len !=
0; i++) {
> - if (map->dm_segs[i].ds_len <= offset) {
> + seg = &map->dm_segs[i];
> + if (seg->ds_len <= offset) {
> /* Segment irrelevant - before requested offset */
> - offset -= map->dm_segs[i].ds_len;
> + offset -= seg->ds_len;
> continue;
> }
>
>  -457,11 +478,15 
> * each segment until we have exhausted the
> * length.
> */
> - seglen = map->dm_segs[i].ds_len - offset;
> + seglen = seg->ds_len - offset;
> if (seglen > len)
> seglen = len;
>
> - ps = map->dm_segs[i].ds_addr + offset;
> + /* Ignore cache-inhibited segments */
> + if ((seg->_ds_flags & BUS_DMA_COHERENT) !=
0)
> + continue;
> +
> + ps = seg->ds_addr + offset;
> pe = ps + seglen;
>
> if (ops & BUS_DMASYNC_PREWRITE) {
>  -655,10 +680,20 
> pmap_enter(pmap_kernel(), va, addr,
> VM_PROT_READ | VM_PROT_WRITE,
> VM_PROT_READ | VM_PROT_WRITE | PMAP_WIRED);
> +
> + /* Cache-inhibit the page if necessary */
> + if ((flags & BUS_DMA_COHERENT) != 0)
> + _pmap_set_page_cacheinhibit(pmap_kernel(), va);
> +
> + segs[curseg]._ds_flags &= ~BUS_DMA_COHERENT;
> + segs[curseg]._ds_flags |= (flags &
BUS_DMA_COHERENT);
> }
> }
> pmap_update(pmap_kernel());
>
> + if ((flags & BUS_DMA_COHERENT) != 0)
> + TBIAS();
> +
> return 0;
> }
>
>  -669,6 +704,8 
> void
> _bus_dmamem_unmap(bus_dma_tag_t t, void *kva, size_t
size)
> {
> + vaddr_t va;
> + size_t s;
>
> #ifdef DIAGNOSTIC
> if ((u_long)kva & PGOFSET)
>  -677,6 +714,15 
>
> size = round_page(size);
>
> + /*
> + * Re-enable cacheing on the range
> + * XXXSCW: There should be some way to indicate that
the pages
> + * were mapped DMA_MAP_COHERENT in the first
place...
> + */
> + for (s = 0, va = (vaddr_t)kva; s < size;
> + s += PAGE_SIZE, va += PAGE_SIZE)
> + _pmap_set_page_cacheable(pmap_kernel(), va);
> +
> pmap_remove(pmap_kernel(), (vaddr_t)kva, (vaddr_t)kva
+ size);
> pmap_update(pmap_kernel());
> uvm_km_free(kernel_map, (vaddr_t)kva, size,
UVM_KMF_VAONLY);
>  -707,6 +753,10 
> continue;
> }
>
> + /*
> + * XXXSCW: What about BUS_DMA_COHERENT ??
> + */
> +
> return m68k_btop((char *)segs[i].ds_addr + off);
> }
>
> Index: arch/m68k/m68k/pmap_motorola.c
>
============================================================
=======
> RCS file:
/cvsroot/src/sys/arch/m68k/m68k/pmap_motorola.c,v
> retrieving revision 1.30
> diff -u -r1.30 pmap_motorola.c
> --- arch/m68k/m68k/pmap_motorola.c 18 May 2007 01:46:40
-0000 1.30
> +++ arch/m68k/m68k/pmap_motorola.c 6 Jun 2007 16:48:20
-0000
>  -2848,8 +2848,6 
> (void)cachectl1(0x80000004, va, len, p);
> }
>
> -#ifdef mvme68k
> -
> void
> _pmap_set_page_cacheable(pmap_t pmap, vaddr_t va)
> {
>  -2905,8 +2903,6 
> return (pmap_pte_ci(pmap_pte(pmap, va)) == 0) ? 1 :
0;
> }
>
> -#endif /* mvme68k */
> -
> #ifdef DEBUG
> /*
> * pmap_pvdump:
-- thorpej
|
thorpej shagadelic.org wrote:
> >> Summary:
> >> TX on sn0 RX on sn0
> >> MD: 1026KB/s 846KB/s
> >> MI: 793KB/s 854KB/s
> >
> > more results:
> >
> > MI driver with BUS_DMA_COHERENT support:
> > TX on sn0 RX on sn0
> > 842KB/s 888KB/s
> >
> > MI driver with BUS_DMA_COHERENT support and
16bytes TX DMA threshold:
> > TX on sn0 RX on sn0
> > 903KB/s 886KB/s
>
> Not too bad! Please check in the bus_dma
enhancements!
Thanks, I'll do soon.
One more (last?) result here:
MI driver with BUS_DMA_COHERENT support, 16bytes TX DMA
threshold,
and properly inlined DCFL/DCFP/DCPL/DCPP cache flush/purge
ops:
TX on sn0 RX on sn0
951KB/s 918KB/s
I think it's acceptable enough to replace MD one..
---
# dmesg|grep sn0
sn0 at obio0: integrated SONIC Ethernet adapter
sn0: Ethernet address 08:00:07:9f:07:c6
# ./ttcp -rs
ttcp-r: buflen=8192, nbuf=2048, align=16384/0, port=5001
tcp
ttcp-r: socket
ttcp-r: accept from 192.168.20.1
ttcp-r: 16777216 bytes in 17.85 real seconds = 917.69 KB/sec
+++
ttcp-r: 2049 I/O calls, msec/call = 8.92, calls/sec =
114.77
ttcp-r: 0.0user 17.7sys 0:17real 99% 0i+0d 0maxrss 0+2pf
0+0csw
# ./ttcp -ts 192.168.20.1
ttcp-t: buflen=8192, nbuf=2048, align=16384/0, port=5001
tcp -> 192.168.20.1
ttcp-t: socket
ttcp-t: connect
ttcp-t: 16777216 bytes in 17.23 real seconds = 951.07 KB/sec
+++
ttcp-t: 2048 I/O calls, msec/call = 8.61, calls/sec =
118.88
ttcp-t: 0.1user 16.8sys 0:17real 98% 0i+0d 0maxrss 0+4098pf
0+0csw
#
---
Izumi Tsutsui
---
Index: arch/m68k/include/cacheops.h
============================================================
=======
RCS file: /cvsroot/src/sys/arch/m68k/include/cacheops.h,v
retrieving revision 1.11
diff -u -r1.11 cacheops.h
--- arch/m68k/include/cacheops.h 11 Dec 2005 12:17:53
-0000 1.11
+++ arch/m68k/include/cacheops.h 7 Jun 2007 12:33:24 -0000
 -83,20
+83,12 
#elif defined(M68040)
-#define DCFA() DCFA_40()
-#define DCFL(pa) DCFL_40((pa))
-#define DCFP(pa) DCFP_40((pa))
#define DCIA() DCIA_40()
#define DCIAS(pa) DCIAS_40((pa))
#define DCIS() DCIS_40()
#define DCIU() DCIU_40()
-#define DCPA() DCPA_40()
-#define DCPL(pa) DCPL_40((pa))
-#define DCPP(pa) DCPP_40((pa))
#define ICIA() ICIA_40()
#define ICPA() ICPA_40()
-#define ICPL(pa) ICPL_40((pa))
-#define ICPP(pa) ICPP_40((pa))
#define PCIA() PCIA_40()
#define TBIA() TBIA_40()
#define TBIAS() TBIAS_40()
 -129,20
+121,12 
#else /* M68K_CACHEOPS_NTYPES == 1 */
-#define DCFA() _DCFA()
-#define DCFL(pa) _DCFL((pa))
-#define DCFP(pa) _DCFP((pa))
#define DCIA() _DCIA()
#define DCIAS(pa) _DCIAS((pa))
#define DCIS() _DCIS()
#define DCIU() _DCIU()
-#define DCPA() _DCPA()
-#define DCPL(pa) _DCPL((pa))
-#define DCPP(pa) _DCPP((pa))
#define ICIA() _ICIA()
#define ICPA() _ICPA()
-#define ICPL(pa) _ICPL((pa))
-#define ICPP(pa) _ICPP((pa))
#define PCIA() _PCIA()
#define TBIA() _TBIA()
#define TBIAS() _TBIAS()
 -151,24
+135,34 
#endif /* M68K_CACHEOPS_NTYPES == 1 */
-void _DCFA(void);
-void _DCFL(paddr_t);
-void _DCFP(paddr_t);
void _DCIA(void);
void _DCIAS(paddr_t);
void _DCIS(void);
void _DCIU(void);
-void _DCPA(void);
-void _DCPL(paddr_t);
-void _DCPP(paddr_t);
void _ICIA(void);
void _ICPA(void);
-void _ICPL(paddr_t);
-void _ICPP(paddr_t);
void _PCIA(void);
void _TBIA(void);
void _TBIAS(void);
void _TBIAU(void);
void _TBIS(vaddr_t);
+
+#if defined(M68040) || defined(M68060)
+
+/*
+ * These cache ops are identical between M68040 and M68060
+ * and not available on M68020 and M68030 so no need to
check cputype.
+ */
+#define DCFA() DCFA_40()
+#define DCPA() DCPA_40()
+#define ICPL(pa) ICPL_40(pa)
+#define ICPP(pa) ICPP_40(pa)
+#define DCPL(pa) DCPL_40(pa)
+#define DCPP(pa) DCPP_40(pa)
+#define DCFL(pa) DCFL_40(pa)
+#define DCFP(pa) DCFP_40(pa)
+
+#endif
+
#endif /* _M68K_CACHEOPS_H_ */
Index: arch/m68k/include/cacheops_60.h
============================================================
=======
RCS file:
/cvsroot/src/sys/arch/m68k/include/cacheops_60.h,v
retrieving revision 1.7
diff -u -r1.7 cacheops_60.h
--- arch/m68k/include/cacheops_60.h 16 Feb 2006 20:17:13
-0000 1.7
+++ arch/m68k/include/cacheops_60.h 7 Jun 2007 12:33:24
-0000
 -156,69
+156,11 
__asm volatile (" .word 0xf478;"); /* cpusha dc
*/
}
-static __inline void __attribute__((__unused__))
-DCFA_60(void)
-{
- __asm volatile (" .word 0xf478;"); /* cpusha dc
*/
-}
-
-/* invalidate instruction physical cache line */
-static __inline void __attribute__((__unused__))
-ICPL_60(paddr_t pa)
-{
- register paddr_t r_pa __asm("%a0") = pa;
-
- __asm volatile (" .word 0xf488;" : :
"a" (r_pa)); /* cinvl ic,%a0 */
-}
-
-/* invalidate instruction physical cache page */
-static __inline void __attribute__((__unused__))
-ICPP_60(paddr_t pa)
-{
- register paddr_t r_pa __asm("%a0") = pa;
-
- __asm volatile (" .word 0xf490;" : :
"a" (r_pa)); /* cinvp ic,%a0 */
-}
-
-/* invalidate data physical cache line */
-static __inline void __attribute__((__unused__))
-DCPL_60(paddr_t pa)
-{
- register paddr_t r_pa __asm("%a0") = pa;
-
- __asm volatile (" .word 0xf448;" : :
"a" (r_pa)); /* cinvl dc,%a0 */
-}
-
-/* invalidate data physical cache page */
-static __inline void __attribute__((__unused__))
-DCPP_60(paddr_t pa)
-{
- register paddr_t r_pa __asm("%a0") = pa;
-
- __asm volatile (" .word 0xf450;" : :
"a" (r_pa)); /* cinvp dc,%a0 */
-}
-
-/* invalidate data physical all */
-static __inline void __attribute__((__unused__))
-DCPA_60(void)
-{
- __asm volatile (" .word 0xf458;"); /* cinva dc
*/
-}
-
-/* data cache flush line */
-static __inline void __attribute__((__unused__))
-DCFL_60(paddr_t pa)
-{
- register paddr_t r_pa __asm("%a0") = pa;
-
- __asm volatile (" .word 0xf468;" : :
"a" (r_pa)); /* cpushl dc,%a0 */
-}
-
-/* data cache flush page */
-static __inline void __attribute__((__unused__))
-DCFP_60(paddr_t pa)
-{
- register paddr_t r_pa __asm("%a0") = pa;
-
- __asm volatile (" .word 0xf470;" : :
"a" (r_pa)); /* cpushp dc,%a0 */
-}
+#define DCFA_60() DCFA_40()
+#define DCPA_60() DCPA_40()
+#define ICPL_60(pa) ICPL_40(pa)
+#define ICPP_60(pa) ICPP_40(pa)
+#define DCPL_60(pa) DCPL_40(pa)
+#define DCPP_60(pa) DCPP_40(pa)
+#define DCFL_60(pa) DCFL_40(pa)
+#define DCFP_60(pa) DCFP_40(pa)
Index: arch/m68k/m68k/cacheops.c
============================================================
=======
RCS file: /cvsroot/src/sys/arch/m68k/m68k/cacheops.c,v
retrieving revision 1.11
diff -u -r1.11 cacheops.c
--- arch/m68k/m68k/cacheops.c 22 Jul 2006 06:36:06
-0000 1.11
+++ arch/m68k/m68k/cacheops.c 7 Jun 2007 12:33:24 -0000
 -404,156
+404,3 
#endif
}
}
-
-#if defined(M68040) || defined(M68060)
-void
-_DCFA(void)
-{
-
- switch (cputype) {
-#ifdef M68040
- case CPU_68040:
- DCFA_40();
- break;
-#endif
-#ifdef M68060
- case CPU_68060:
- DCFA_60();
- break;
-#endif
- }
-}
-
-void
-_DCPA(void)
-{
-
- switch (cputype) {
- default:
-#ifdef M68040
- case CPU_68040:
- DCPA_40();
- break;
-#endif
-#ifdef M68060
- case CPU_68060:
- DCPA_60();
- break;
-#endif
- }
-}
-
-void
-_ICPL(paddr_t pa)
-{
-
- switch (cputype) {
- default:
-#ifdef M68040
- case CPU_68040:
- ICPL_40(pa);
- break;
-#endif
-#ifdef M68060
- case CPU_68060:
- ICPL_60(pa);
- break;
-#endif
- }
-}
-
-void
-_ICPP(paddr_t pa)
-{
-
- switch (cputype) {
- default:
-#ifdef M68040
- case CPU_68040:
- ICPP_40(pa);
- break;
-#endif
-#ifdef M68060
- case CPU_68060:
- ICPP_60(pa);
- break;
-#endif
- }
-}
-
-void
-_DCPL(paddr_t pa)
-{
-
- switch (cputype) {
- default:
-#ifdef M68040
- case CPU_68040:
- DCPL_40(pa);
- break;
-#endif
-#ifdef M68060
- case CPU_68060:
- DCPL_60(pa);
- break;
-#endif
- }
-}
-
-void
-_DCPP(paddr_t pa)
-{
-
- switch (cputype) {
- default:
-#ifdef M68040
- case CPU_68040:
- DCPP_40(pa);
- break;
-#endif
-#ifdef M68060
- case CPU_68060:
- DCPP_60(pa);
- break;
-#endif
- }
-}
-
-void
-_DCFL(paddr_t pa)
-{
-
- switch (cputype) {
- default:
-#ifdef M68040
- case CPU_68040:
- DCFL_40(pa);
- break;
-#endif
-#ifdef M68060
- case CPU_68060:
- DCFL_60(pa);
- break;
-#endif
- }
-}
-
-void
-_DCFP(paddr_t pa)
-{
-
- switch (cputype) {
- default:
-#ifdef M68040
- case CPU_68040:
- DCFP_40(pa);
- break;
-#endif
-#ifdef M68060
- case CPU_68060:
- DCFP_60(pa);
- break;
-#endif
- }
-}
-#endif /* M68040 || M68060 */
|