-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibliography.bib
106 lines (92 loc) · 4.21 KB
/
bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
@misc{cho2024transformerexplainerinteractivelearning,
title={Transformer Explainer: Interactive Learning of Text-Generative Models},
author={Aeree Cho and Grace C. Kim and Alexander Karpekov and Alec Helbling and Zijie J. Wang and Seongmin Lee and Benjamin Hoover and Duen Horng Chau},
year={2024},
eprint={2408.04619},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2408.04619},
}
@article{vaswani2017attention,
title={Attention is all you need},
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
journal={Advances in neural information processing systems},
volume={30},
year={2017},
url = {https://www.oreilly.com/library/view/natural-language-processing/9781098136789/}
}
@book{tunstall2022natural,
title={Natural language processing with transformers},
author={Tunstall, Lewis and Von Werra, Leandro and Wolf, Thomas},
year={2022},
publisher={" O'Reilly Media, Inc."},
url = {https://www.oreilly.com/library/view/natural-language-processing/9781098136789/}
}
@misc{karpathy_youtube_2023_gpt,
author = {Karpathy, Andrej},
title = {Let's build GPT: from scratch, in code, spelled out},
year = {2023},
howpublished = {YouTube},
url = {https://www.youtube.com/watch?v=kCc8FmEb1nY},
}
@misc{OmarSansevieroBlogRandomTransformer,
author = {Omar Sanseviero},
title = {The Random Transformer},
year = {2024},
url = {https://osanseviero.github.io/hackerllama/blog/posts/random_transformer/},
}
@misc{TheIllustratedTransformerGlob,
author = {Jay Alammar},
title = {The Illustrated Transformer},
year = {2018},
url = {https://jalammar.github.io/illustrated-transformer/},
}
@misc{SebastianRaschkaUnderstandingAttention,
author = {Sebastian Raschka},
title = {Understanding and Coding Self-Attention, Multi-Head Attention, Cross-Attention, and Causal-Attention in LLMs},
year = {2024},
url = {https://magazine.sebastianraschka.com/p/understanding-and-coding-self-attention},
}
@article{eldan2023tinystories,
title={TinyStories: How Small Can Language Models Be and Still Speak Coherent English?},
author={Eldan, Ronen and Li, Yuanzhi},
journal={arXiv preprint arXiv:2305.07759},
year={2023},
url = {https://arxiv.org/abs/2305.07759},
}
@article{suzgun2022challenging,
title={Challenging BIG-Bench Tasks and Whether Chain-of-Thought Can Solve Them},
author={Suzgun, Mirac and Scales, Nathan and Sch{\"a}rli, Nathanael and Gehrmann, Sebastian and Tay, Yi and Chung, Hyung Won and Chowdhery, Aakanksha and Le, Quoc V and Chi, Ed H and Zhou, Denny and and Wei, Jason},
journal={arXiv preprint arXiv:2210.09261},
year={2022}
}
@misc{wei2023chainofthought,
title={Chain-of-Thought Prompting Elicits Reasoning in Large Language Models},
author={Jason Wei and Xuezhi Wang and Dale Schuurmans and Maarten Bosma and Brian Ichter and Fei Xia and Ed Chi and Quoc Le and Denny Zhou},
year={2023},
eprint={2201.11903},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{khattab2023dspy,
title={DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines},
author={Omar Khattab and Arnav Singhvi and Paridhi Maheshwari and Zhiyuan Zhang and Keshav Santhanam and Sri Vardhamanan and Saiful Haq and Ashutosh Sharma and Thomas T. Joshi and Hanna Moazam and Heather Miller and Matei Zaharia and Christopher Potts},
year={2023},
eprint={2310.03714},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{singhvi2024dspy,
title={DSPy Assertions: Computational Constraints for Self-Refining Language Model Pipelines},
author={Arnav Singhvi and Manish Shetty and Shangyin Tan and Christopher Potts and Koushik Sen and Matei Zaharia and Omar Khattab},
year={2024},
eprint={2312.13382},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{Hermes-2-Pro-Mistral-7B,
url={[https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B]https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B)},
title={Hermes-2-Pro-Mistral-7B},
author={interstellarninja and Teknium and theemozilla and karan4d and huemin_art},
year={2024},
}