L
000
Initializing Studio...
Deploy your fine-tuned models to production with OpenAI-compatible endpoints.
1from langtrain import LoRATrainer23# After training, push to cloud4trainer = LoRATrainer(model="llama-3.3-8b", output_dir="./model")5trainer.train("data.jsonl")67# Deploy to Langtrain Cloud8trainer.push("my-assistant")910# Your model is now available at:11# POST https://api.langtrain.xyz/v1/chat/completions12# Model: my-assistant
1import openai23# Use with OpenAI SDK4client = openai.OpenAI(5 api_key="your-langtrain-api-key",6 base_url="https://api.langtrain.xyz/v1"7)89response = client.chat.completions.create(10 model="my-assistant",11 messages=[{"role": "user", "content": "Hello!"}]12)1314print(response.choices[0].message.content)1516# Streaming17stream = client.chat.completions.create(18 model="my-assistant",19 messages=[{"role": "user", "content": "Tell me a story"}],20 stream=True21)22for chunk in stream:23 print(chunk.choices[0].delta.content or "", end="")
1# Export model to Docker2langtrain export docker --model my-assistant --output ./docker34# Build and run locally5cd docker6docker build -t my-assistant:latest .7docker run -p 8000:8000 --gpus all my-assistant:latest89# Test the endpoint10curl -X POST http://localhost:8000/v1/chat/completions \11 -H "Content-Type: application/json" \12 -d '{"model": "my-assistant", "messages": [{"role": "user", "content": "Hello"}]}'
1# Install Langtrain Helm chart2helm repo add langtrain https://charts.langtrain.xyz3helm repo update45# Deploy with GPU support6helm install my-assistant langtrain/model \7 --set model.name=my-assistant \8 --set model.apiKey=$LANGTRAIN_API_KEY \9 --set resources.limits.nvidia.com/gpu=1 \10 --set replicas=31112# Expose via ingress13kubectl apply -f - <<EOF14apiVersion: networking.k8s.io/v115kind: Ingress16metadata:17 name: my-assistant18spec:19 rules:20 - host: api.mycompany.com21 http:22 paths:23 - path: /24 backend:25 service:26 name: my-assistant27 port: 800028EOF
1# Langtrain Cloud auto-scaling (via dashboard or API)2deployment_config = {3 "model": "my-assistant",4 "scaling": {5 "min_replicas": 1,6 "max_replicas": 10,7 "target_gpu_utilization": 70,8 "scale_up_cooldown": "2m",9 "scale_down_cooldown": "10m"10 },11 "instance_type": "gpu-a100-40gb"12}1314# Kubernetes HPA15kubectl autoscale deployment my-assistant \16 --min=1 --max=10 \17 --cpu-percent=70
1# Export to HuggingFace format2trainer.export("./export", format="huggingface")34# Export to GGUF for llama.cpp5trainer.export("./export", format="gguf", quantization="q4_k_m")67# Export to ONNX for edge deployment8trainer.export("./export", format="onnx")910# Upload to HuggingFace Hub11trainer.push_to_hub("your-username/my-model")