APIs + Hugging Face quickstart
Plain-language picture: - Break input into tokens → look at context → predict the next token → repeat until you hit a stop condition.
Installation:
Environment:
Python (finance example):
from openai import OpenAI
import os
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) # or rely on env var only
resp = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a concise finance tutor."},
{"role": "user", "content": "Explain the CAPM intuition in 3 bullets for a portfolio manager."}
],
temperature=0.2,
)
print(resp.choices[0].message.content)from openai import OpenAI
import os
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
resp = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a finance assistant."},
{"role": "user", "content": "Continue the sentence: 'In CAPM, expected return equals the risk-free rate plus beta times the ...'"}
],
temperature=0.0,
logprobs=True,
top_logprobs=5,
)
choice = resp.choices[0]
print("Output:", choice.message.content)
if choice.logprobs and choice.logprobs.content:
# Show top candidate tokens with log probabilities for the first generated token
first = choice.logprobs.content[0]
for cand in first.top_logprobs:
print(cand.token, cand.logprob)Notes:
Installation:
Environment:
Python (finance example):
from anthropic import Anthropic
import os
client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) # or via env only
msg = client.messages.create(
model="claude-3-5-sonnet-latest",
max_tokens=300,
messages=[
{"role": "user", "content": "Give a 1-paragraph intuition for hedging with index futures for an equity portfolio."}
],
)
print(msg.content[0].text)Goal: a friendly command-line app that answers finance questions via an LLM with a finance-focused system message.
Install dependencies:
We’ll place the runnable script at src/lectures/day1-llms/finance_llm_cli.py.
"""
src/lectures/day1-llms/finance_llm_cli.py
"""
import os
import sys
from typing import Optional
import typer
from rich.console import Console
from rich.panel import Panel
from rich.markdown import Markdown
from rich.table import Table
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv() # loads .env if present
console = Console()
DEFAULT_SYSTEM = (
"You are a finance research assistant specialized in financial markets. "
"Answer concisely, show clear assumptions, and prefer plain-language explanations."
)
def create_client() -> OpenAI:
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
console.print(
Panel.fit(
"Missing OPENAI_API_KEY. Set it in your environment or .env file.",
title="Configuration error",
style="bold red",
)
)
raise typer.Exit(code=1)
return OpenAI(api_key=api_key)app = typer.Typer(help="Finance LLM CLI — ask questions about markets")
@app.command()
def ask(
question: str = typer.Argument(..., help="Your finance question"),
model: str = typer.Option("gpt-4o-mini", help="Model ID"),
temperature: float = typer.Option(0.2, min=0.0, max=2.0, help="Sampling temperature"),
max_tokens: int = typer.Option(500, help="Max new tokens for the answer"),
system: Optional[str] = typer.Option(None, help="Override the finance system message"),
):
"""Ask a finance question and print a nicely formatted answer."""
client = create_client()
system_msg = system or DEFAULT_SYSTEM
console.rule("Finance LLM")
console.print(Panel.fit(question, title="Question", style="bold cyan"))
resp = client.chat.completions.create(
model=model,
temperature=temperature,
max_completion_tokens=max_tokens,
messages=[
{"role": "system", "content": system_msg},
{"role": "user", "content": question},
],
)
content = resp.choices[0].message.content
console.print(Panel(Markdown(content), title="Answer", border_style="green"))
# Show token usage if available
if getattr(resp, "usage", None):
usage = resp.usage
table = Table(title="Token usage")
table.add_column("prompt_tokens", justify="right")
table.add_column("completion_tokens", justify="right")
table.add_column("total_tokens", justify="right")
table.add_row(str(usage.prompt_tokens), str(usage.completion_tokens), str(usage.total_tokens))
console.print(table)
def main():
try:
app()
except KeyboardInterrupt:
console.print("\nInterrupted.")
if __name__ == "__main__":
main()Tip:
Create a .env file (same folder as you run the command or project root):
Load automatically with python-dotenv (already in the code via load_dotenv()).
Add .env to your .gitignore to avoid committing secrets:
Optional: provide a .env.example without real keys for collaborators.
Installation (CPU-friendly baseline):
Notes:
mps automatically; no extra setup needed in recent wheels.sshleifer/tiny-gpt2, distilgpt2, or google/flan-t5-small for instruction tasks.from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model_id = "distilgpt2" # small, runs on CPU
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float32,
device_map="auto", # uses MPS/GPU if available, otherwise CPU
)
prompt = "Summarize the yield curve and why it can invert, in 2 lines:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))bitsandbytes and AutoGPTQ models.TinyLlama/TinyLlama-1.1B-Chat-v1.0 with enough RAM.