L
Initializing Studio...
Real-time monitoring and observability for your models and training processes.
1from langtrain import Monitor23# Initialize monitoring4monitor = Monitor(5 project_name='my_project',6 experiment_name='bert_fine_tuning',7 tracking_uri='http://localhost:5000'8)910# Start monitoring training11monitor.start_training(12 model=model,13 train_data=train_dataset,14 val_data=val_dataset,15 metrics=['loss', 'accuracy', 'f1_score'],16 log_frequency=100 # Log every 100 steps17)1819# Log custom metrics20monitor.log_metric('learning_rate', 0.001, step=epoch)21monitor.log_metric('batch_size', 32)22monitor.log_artifact('model_config.json', config)
1# Performance monitoring setup2from langtrain.monitoring import PerformanceMonitor34perf_monitor = PerformanceMonitor(5 model=model,6 baseline_metrics={7 'accuracy': 0.92,8 'latency_p95': 100, # milliseconds9 'throughput': 1000 # requests/second10 }11)1213# Monitor inference performance14@perf_monitor.track_inference15def predict(inputs):16 return model.predict(inputs)1718# Set up alerts for performance degradation19perf_monitor.set_alert(20 metric='accuracy',21 threshold=0.85,22 comparison='less_than',23 action='email_alert'24)2526# Generate performance reports27report = perf_monitor.generate_report(28 time_range='last_7_days',29 include_trends=True30)
1# System resource monitoring2from langtrain.monitoring import SystemMonitor34sys_monitor = SystemMonitor(5 track_gpu=True,6 track_memory=True,7 track_disk=True,8 track_network=True9)1011# Start system monitoring12sys_monitor.start()1314# Get current resource usage15resources = sys_monitor.get_current_usage()16print(f"GPU Utilization: {resources['gpu_utilization']}%")17print(f"Memory Usage: {resources['memory_usage']}%")18print(f"Disk I/O: {resources['disk_io']} MB/s")1920# Set resource alerts21sys_monitor.set_alert(22 metric='gpu_memory',23 threshold=90, # Alert at 90% GPU memory usage24 action='scale_resources'25)2627# Log resource metrics28sys_monitor.log_to_dashboard(dashboard_url='http://grafana:3000')
1# Data quality monitoring2from langtrain.monitoring import DataMonitor34data_monitor = DataMonitor(5 reference_data=training_data,6 feature_columns=['text_length', 'sentiment_score'],7 categorical_columns=['category', 'language']8)910# Monitor incoming data11@data_monitor.track_data_quality12def process_batch(batch_data):13 # Your data processing logic14 predictions = model.predict(batch_data)15 return predictions1617# Detect data drift18drift_report = data_monitor.detect_drift(19 new_data=production_data,20 drift_methods=['ks_test', 'chi_square', 'jensen_shannon']21)2223if drift_report.has_drift:24 print(f"Data drift detected in features: {drift_report.drifted_features}")2526# Set up data quality alerts27data_monitor.configure_alerts(28 drift_threshold=0.1,29 quality_threshold=0.95,30 notification_channels=['email', 'slack']31)
1# Custom dashboard creation2from langtrain.monitoring import Dashboard34dashboard = Dashboard(name='Model Performance Dashboard')56# Add metric widgets7dashboard.add_widget(8 type='line_chart',9 title='Training Loss',10 metrics=['train_loss', 'val_loss'],11 time_range='last_24_hours'12)1314dashboard.add_widget(15 type='gauge',16 title='Current Accuracy',17 metric='accuracy',18 min_value=0.0,19 max_value=1.0,20 threshold_ranges=[21 {'min': 0.0, 'max': 0.7, 'color': 'red'},22 {'min': 0.7, 'max': 0.85, 'color': 'yellow'},23 {'min': 0.85, 'max': 1.0, 'color': 'green'}24 ]25)2627dashboard.add_widget(28 type='table',29 title='Model Comparison',30 data_source='model_comparison_results',31 columns=['model_name', 'accuracy', 'f1_score', 'latency']32)3334# Deploy dashboard35dashboard.deploy(url='http://monitoring:8080/dashboard')