containerization
Installation
SKILL.md
Containerization & Kubernetes
Production-grade container orchestration for data engineering workloads with Docker and Kubernetes.
Quick Start
# Dockerfile for PySpark data application
FROM python:3.12-slim
# Install Java for Spark
RUN apt-get update && apt-get install -y openjdk-17-jdk-headless && \
apt-get clean && rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install dependencies first (cache optimization)
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt