add example

This commit is contained in:
Alexander Daichendt 2023-11-15 12:47:45 +01:00
parent db924a2887
commit 979c60b1a6
19 changed files with 283 additions and 0 deletions

BIN
example/example.pdf Normal file

Binary file not shown.

153
example/example.typ Normal file
View file

@ -0,0 +1,153 @@
#import "@preview/polylux:0.3.1": *
#import "@preview/cetz:0.1.2": canvas, plot
#import "@preview/tablex:0.0.6": tablex, rowspanx, colspanx, hlinex, cellx
#import "@preview/metro:0.1.0": *
#import units:*
#import "../src/tumtheme.typ": *
#show: tum-theme.with(
footer: [Alex Daichendt -- Chiplet Technology and the Impact of NUMA on Applications],
)
#let ftnt(body) = footnote(text(size: 8pt, body))
#let title = "TUM test slides"
#title-slide(
title: "Chiplet Technology and the Impact of NUMA on Applications",
authors: "Alex Daichendt",
chair: "IN2076 Advanced Computer Architecture"
)
#let nm = unit("nano meter")
#slide()[
= Intel Press Workshops June 2017 #ftnt(link("https://www.techpowerup.com/235092/intel-says-amd-epyc-processors-glued-together-in-official-slide-deck", "TechPowerUp; no primary source available"))
#align(center, image(width: 79%, "./figures/intel_slide1.jpg"))
]
#slide()[
= Why chiplets?
#v(2cm)
#grid(
columns: (40%, 60%),
[
#v(2cm)
- Moore's Law
- more flexibility in design
#pause
- low production yield for monolithic dies \
#sym.arrow.r \$\$\$
],
[
#let nm = ("45" + nm, "32" + nm, "28" + nm, "20" + nm, "14" + nm, "10" + nm, "7"+nm, "5" + nm)
#figure(
canvas(length: 1.5cm, {
plot.plot(size: (10, 4),
x-tick-step: 1,
y-tick-step: 1,
y-max: 6.0,
x-min: 0,
y-min: 0,
x-grid: true,
y-grid: true,
y-label: "Normalized Cost",
x-label: none,
x-format: value => nm.at(int(value)),
{
plot.add(
mark: "triangle",
mark-size: 0.1cm,
((0,1),(1,1.5), (2,1.7), (3,1.95), (4, 2.1), (6,3.8), (7, 4.95))
)
})
}),
caption: [Normalized cost per chip vs. technology node, based on Naffziger et al.#ftnt(cite(form: "full", <Naffziger2021>))])
]
)
]
#slide()[
= AMD Naples (1#super[st] Gen. EPYC) -- NUMA Toplogy #ftnt(link("https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56308-numa-topology-for-epyc-naples-family-processors.pdf"))
#v(1cm)
#figure(image(width: 85%, "./figures/naples.jpg"))
]
#slide()[
= AMD Naples (1#super[st] Gen. EPYC)
#figure(image(width: 60%, "./figures/naples-multilayerpackaging.jpg"),
caption: [Multi-layer package routing, DDR (red), IO (orange), infinity-fabric (blue) #ftnt(cite(form: "full", <Naffziger2021>))])
]
#slide()[
= AMD Rome (2#super[nd] Gen. EPYC) #ftnt(link("https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/tuning-guides/amd-epyc-7002-tg-hpc-56827.pdf"))
#figure(image(width: 41%, "./figures/rome.jpg"))
]
#slide()[
= Memory Access Latencies for Naples and Rome, Naffziger et al.#ftnt(cite(form: "full", <Naffziger2020>))
#v(1cm)
#align(center,
image(width: 85%, "./figures/naples-vs-rome.jpg")
)
]
#slide()[
= Impact of NUMA on Applications
== Emmerich et al. #ftnt(cite(form: "full", <Emmerich2018>)) -- User Space Networking Drivers
#figure(
tablex(
columns: 5,
inset: 10pt,
header-rows: 1,
auto-vlines: false,
auto-lines: false,
caption: ["aa"],
[Ingress NIC], [Egress NIC], [CPU], [Memory], [Throughput],
hlinex(),
[Node 0], [Node 0], [Node 0], [Node 0], [10.8M pps],
[Node 0], [Node 0], [Node 0], [Node 1], [10.8M pps],
[Node 0], [Node 0], [Node 1], [Node 0], [7.6M pps],
[Node 0], [Node 0], [Node 1], [Node 1], cellx(fill:red)[6.6M pps],
[Node 0], [Node 1], [Node 0], [Node 0], [7.9M pps],
[Node 0], [Node 1], [Node 0], [Node 1], [10.0M pps],
[Node 0], [Node 1], [Node 1], [Node 0], [8.6M pps],
[Node 0], [Node 1], [Node 1], [Node 1], [8.1M pps],
hlinex()
),
caption: [Forwarding performance, columns indicates pinning of resources, based on Emmerich et al.#cite(<Emmerich2018>)]
)
]
#slide()[
= Impact of NUMA on Applications
== Li et al. #ftnt(cite(form: "full", <Li2014>)) -- Memcached
#figure(
image(width: 60%, "./figures/talesoftail.png"),
caption: [Memcached tail latency; 2 sockets; two instances (green), one instance (blue), based on Li et al.#cite(<Li2014>)]
)
]
#slide()[
= Conclusion
#v(2cm)
- CPU architecture matters
- Chiplet technology is a fundamental part of future CPU architectures
- Inconsistent memory access latencies are a challenge for applications
]
#slide()[
#bibliography("lib.bib")
]

Binary file not shown.

After

Width:  |  Height:  |  Size: 181 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 358 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 522 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 490 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 MiB

BIN
example/figures/naples.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

BIN
example/figures/naples.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

BIN
example/figures/rome.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 514 KiB

BIN
example/figures/rome.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

49
example/lib.bib Normal file
View file

@ -0,0 +1,49 @@
@inproceedings{Naffziger2021,
doi = {10.1109/isca52012.2021.00014},
url = {https://doi.org/10.1109/isca52012.2021.00014},
year = {2021},
month = jun,
publisher = {{IEEE}},
author = {Samuel Naffziger and Noah Beck and Thomas Burd and Kevin Lepak and Gabriel H. Loh and Mahesh Subramony and Sean White},
title = {Pioneering Chiplet Technology and Design for the {AMD} {EPYC}™ and Ryzen™ Processor Families : Industrial Product},
booktitle = {2021 {ACM}/{IEEE} 48th Annual International Symposium on Computer Architecture ({ISCA})}
}
@inproceedings{Naffziger2020,
doi = {10.1109/isscc19947.2020.9063103},
url = {https://doi.org/10.1109/isscc19947.2020.9063103},
year = {2020},
month = feb,
publisher = {{IEEE}},
author = {Samuel Naffziger and Kevin Lepak and Milam Paraschou and Mahesh Subramony},
title = {2.2 {AMD} Chiplet Architecture for High-Performance Server and Desktop Products},
booktitle = {2020 {IEEE} International Solid- State Circuits Conference - ({ISSCC})}
}
@inproceedings{Emmerich2018,
author = {Emmerich, Paul and Pudelko, Maximilian and Bauer, Simon and Carle, Georg},
title = {User Space Network Drivers},
year = {2018},
isbn = {9781450355858},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3232755.3232767},
doi = {10.1145/3232755.3232767},
abstract = {The rise of user space packet processing frameworks like DPDK and netmap makes low-level code more accessible to developers and researchers. Previously, driver code was hidden in the kernel and rarely modified-or even looked at-by developers working at higher layers. These barriers are gone nowadays, yet developers still treat user space drivers as black-boxes magically accelerating applications. We want to change this: every researcher building network applications should understand the intricacies of the underlying drivers, especially if they impact performance. We present ixy, a user space network driver designed for simplicity and educational purposes. Ixy focuses on the bare essentials of user space packet processing: a packet forwarder including the whole NIC driver uses less than 1000 lines of C code.Our code is available as free and open source under the BSD license at https://github.com/emmericp/ixy.},
booktitle = {Proceedings of the Applied Networking Research Workshop},
pages = {9193},
numpages = {3},
location = {Montreal, QC, Canada},
series = {ANRW '18}
}
@inproceedings{Li2014,
doi = {10.1145/2670979.2670988},
url = {https://doi.org/10.1145/2670979.2670988},
year = {2014},
month = nov,
publisher = {{ACM}},
author = {Jialin Li and Naveen Kr. Sharma and Dan R. K. Ports and Steven D. Gribble},
title = {Tales of the Tail},
booktitle = {Proceedings of the {ACM} Symposium on Cloud Computing}
}