add example
This commit is contained in:
parent
db924a2887
commit
979c60b1a6
19 changed files with 283 additions and 0 deletions
153
example/example.typ
Normal file
153
example/example.typ
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
#import "@preview/polylux:0.3.1": *
|
||||
#import "@preview/cetz:0.1.2": canvas, plot
|
||||
#import "@preview/tablex:0.0.6": tablex, rowspanx, colspanx, hlinex, cellx
|
||||
|
||||
#import "@preview/metro:0.1.0": *
|
||||
#import units:*
|
||||
|
||||
#import "../src/tumtheme.typ": *
|
||||
|
||||
#show: tum-theme.with(
|
||||
footer: [Alex Daichendt -- Chiplet Technology and the Impact of NUMA on Applications],
|
||||
)
|
||||
|
||||
#let ftnt(body) = footnote(text(size: 8pt, body))
|
||||
|
||||
|
||||
#let title = "TUM test slides"
|
||||
|
||||
#title-slide(
|
||||
title: "Chiplet Technology and the Impact of NUMA on Applications",
|
||||
authors: "Alex Daichendt",
|
||||
chair: "IN2076 Advanced Computer Architecture"
|
||||
)
|
||||
|
||||
#let nm = unit("nano meter")
|
||||
|
||||
#slide()[
|
||||
= Intel Press Workshops June 2017 #ftnt(link("https://www.techpowerup.com/235092/intel-says-amd-epyc-processors-glued-together-in-official-slide-deck", "TechPowerUp; no primary source available"))
|
||||
#align(center, image(width: 79%, "./figures/intel_slide1.jpg"))
|
||||
]
|
||||
|
||||
#slide()[
|
||||
= Why chiplets?
|
||||
#v(2cm)
|
||||
|
||||
#grid(
|
||||
columns: (40%, 60%),
|
||||
[
|
||||
#v(2cm)
|
||||
- Moore's Law
|
||||
- more flexibility in design
|
||||
#pause
|
||||
- low production yield for monolithic dies \
|
||||
#sym.arrow.r \$\$\$
|
||||
],
|
||||
|
||||
[
|
||||
#let nm = ("45" + nm, "32" + nm, "28" + nm, "20" + nm, "14" + nm, "10" + nm, "7"+nm, "5" + nm)
|
||||
#figure(
|
||||
canvas(length: 1.5cm, {
|
||||
plot.plot(size: (10, 4),
|
||||
x-tick-step: 1,
|
||||
y-tick-step: 1,
|
||||
y-max: 6.0,
|
||||
x-min: 0,
|
||||
y-min: 0,
|
||||
x-grid: true,
|
||||
y-grid: true,
|
||||
y-label: "Normalized Cost",
|
||||
x-label: none,
|
||||
x-format: value => nm.at(int(value)),
|
||||
{
|
||||
plot.add(
|
||||
mark: "triangle",
|
||||
mark-size: 0.1cm,
|
||||
((0,1),(1,1.5), (2,1.7), (3,1.95), (4, 2.1), (6,3.8), (7, 4.95))
|
||||
)
|
||||
|
||||
})
|
||||
}),
|
||||
caption: [Normalized cost per chip vs. technology node, based on Naffziger et al.#ftnt(cite(form: "full", <Naffziger2021>))])
|
||||
|
||||
]
|
||||
)
|
||||
|
||||
]
|
||||
|
||||
#slide()[
|
||||
= AMD Naples (1#super[st] Gen. EPYC) -- NUMA Toplogy #ftnt(link("https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56308-numa-topology-for-epyc-naples-family-processors.pdf"))
|
||||
#v(1cm)
|
||||
#figure(image(width: 85%, "./figures/naples.jpg"))
|
||||
]
|
||||
|
||||
#slide()[
|
||||
= AMD Naples (1#super[st] Gen. EPYC)
|
||||
#figure(image(width: 60%, "./figures/naples-multilayerpackaging.jpg"),
|
||||
caption: [Multi-layer package routing, DDR (red), IO (orange), infinity-fabric (blue) #ftnt(cite(form: "full", <Naffziger2021>))])
|
||||
]
|
||||
|
||||
#slide()[
|
||||
= AMD Rome (2#super[nd] Gen. EPYC) #ftnt(link("https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/tuning-guides/amd-epyc-7002-tg-hpc-56827.pdf"))
|
||||
#figure(image(width: 41%, "./figures/rome.jpg"))
|
||||
]
|
||||
|
||||
#slide()[
|
||||
= Memory Access Latencies for Naples and Rome, Naffziger et al.#ftnt(cite(form: "full", <Naffziger2020>))
|
||||
#v(1cm)
|
||||
#align(center,
|
||||
image(width: 85%, "./figures/naples-vs-rome.jpg")
|
||||
)
|
||||
]
|
||||
|
||||
#slide()[
|
||||
= Impact of NUMA on Applications
|
||||
== Emmerich et al. #ftnt(cite(form: "full", <Emmerich2018>)) -- User Space Networking Drivers
|
||||
|
||||
#figure(
|
||||
tablex(
|
||||
columns: 5,
|
||||
inset: 10pt,
|
||||
header-rows: 1,
|
||||
auto-vlines: false,
|
||||
auto-lines: false,
|
||||
caption: ["aa"],
|
||||
[Ingress NIC], [Egress NIC], [CPU], [Memory], [Throughput],
|
||||
hlinex(),
|
||||
[Node 0], [Node 0], [Node 0], [Node 0], [10.8M pps],
|
||||
[Node 0], [Node 0], [Node 0], [Node 1], [10.8M pps],
|
||||
[Node 0], [Node 0], [Node 1], [Node 0], [7.6M pps],
|
||||
[Node 0], [Node 0], [Node 1], [Node 1], cellx(fill:red)[6.6M pps],
|
||||
[Node 0], [Node 1], [Node 0], [Node 0], [7.9M pps],
|
||||
[Node 0], [Node 1], [Node 0], [Node 1], [10.0M pps],
|
||||
[Node 0], [Node 1], [Node 1], [Node 0], [8.6M pps],
|
||||
[Node 0], [Node 1], [Node 1], [Node 1], [8.1M pps],
|
||||
hlinex()
|
||||
|
||||
),
|
||||
caption: [Forwarding performance, columns indicates pinning of resources, based on Emmerich et al.#cite(<Emmerich2018>)]
|
||||
)
|
||||
]
|
||||
|
||||
#slide()[
|
||||
= Impact of NUMA on Applications
|
||||
== Li et al. #ftnt(cite(form: "full", <Li2014>)) -- Memcached
|
||||
|
||||
#figure(
|
||||
image(width: 60%, "./figures/talesoftail.png"),
|
||||
caption: [Memcached tail latency; 2 sockets; two instances (green), one instance (blue), based on Li et al.#cite(<Li2014>)]
|
||||
)
|
||||
]
|
||||
|
||||
#slide()[
|
||||
= Conclusion
|
||||
#v(2cm)
|
||||
- CPU architecture matters
|
||||
- Chiplet technology is a fundamental part of future CPU architectures
|
||||
- Inconsistent memory access latencies are a challenge for applications
|
||||
]
|
||||
|
||||
#slide()[
|
||||
#bibliography("lib.bib")
|
||||
]
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue