submission_id: meta-llama-llama-guard-3-8b_v4
developer_uid: chai_backend_admin
alignment_samples: 12205
alignment_score: -0.9397576479976646
best_of: 1
celo_rating: 1117.31
display_name: meta-llama-llama-guard-3-8b_v4
formatter: {'memory_template': "{bot_name}'s Persona: {memory}\n####\n", 'prompt_template': '{prompt}\n<START>\n', 'bot_template': '{bot_name}: {message}\n', 'user_template': '{user_name}: {message}\n', 'response_template': '{bot_name}:', 'truncate_by_message': False}
generation_params: {'temperature': 1.0, 'top_p': 1.0, 'min_p': 0.0, 'top_k': 40, 'presence_penalty': 0.0, 'frequency_penalty': 0.0, 'stopping_words': ['\n'], 'max_input_tokens': 2048, 'best_of': 1, 'max_output_tokens': 64}
gpu_counts: {'NVIDIA RTX A5000': 1}
is_internal_developer: True
language_model: meta-llama/Llama-Guard-3-8B
latencies: [{'batch_size': 1, 'throughput': 0.7616703414112728, 'latency_mean': 1.3128387999534608, 'latency_p50': 1.317323088645935, 'latency_p90': 1.4267670869827271}, {'batch_size': 4, 'throughput': 1.5858351912660584, 'latency_mean': 2.513174532651901, 'latency_p50': 2.5200756788253784, 'latency_p90': 2.779255986213684}, {'batch_size': 5, 'throughput': 1.716309011651449, 'latency_mean': 2.8982256031036377, 'latency_p50': 2.8970471620559692, 'latency_p90': 3.3062103271484373}, {'batch_size': 8, 'throughput': 1.9739318552433565, 'latency_mean': 4.035021967887879, 'latency_p50': 4.025168776512146, 'latency_p90': 4.564210534095764}, {'batch_size': 10, 'throughput': 2.0780307455361826, 'latency_mean': 4.787236242294312, 'latency_p50': 4.783550500869751, 'latency_p90': 5.400000977516174}, {'batch_size': 12, 'throughput': 2.0790226934208738, 'latency_mean': 5.720153164863587, 'latency_p50': 5.746929049491882, 'latency_p90': 6.348146557807922}, {'batch_size': 15, 'throughput': 2.084124160712627, 'latency_mean': 7.0969349694252015, 'latency_p50': 7.137486100196838, 'latency_p90': 7.828801965713501}]
max_input_tokens: 2048
max_output_tokens: 64
model_architecture: LlamaForCausalLM
model_group: meta-llama/Llama-Guard-3
model_name: meta-llama-llama-guard-3-8b_v4
model_num_parameters: 8030261248.0
model_repo: meta-llama/Llama-Guard-3-8B
model_size: 8B
num_battles: 12204
num_wins: 4149
propriety_score: 0.7394296951819076
propriety_total_count: 1017.0
ranking_group: single
status: inactive
submission_type: basic
throughput_3p7s: 1.92
timestamp: 2024-09-06T01:11:16+00:00
us_pacific_date: 2024-09-05
win_ratio: 0.33997050147492625
Download Preference Data
Resubmit model
Shutdown handler not registered because Python interpreter is not running in the main thread
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLizer
Starting job with name meta-llama-llama-guard-3-8b-v4-mkmlizer
Waiting for job on meta-llama-llama-guard-3-8b-v4-mkmlizer to finish
meta-llama-llama-guard-3-8b-v4-mkmlizer: ╔═════════════════════════════════════════════════════════════════════╗
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ _____ __ __ ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ / _/ /_ ___ __/ / ___ ___ / / ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ / _/ / // / |/|/ / _ \/ -_) -_) / ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ /_//_/\_, /|__,__/_//_/\__/\__/_/ ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ /___/ ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ Version: 0.10.1 ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ Copyright 2023 MK ONE TECHNOLOGIES Inc. ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ https://mk1.ai ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ The license key for the current software has been verified as ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ belonging to: ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ Chai Research Corp. ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ Account ID: 7997a29f-0ceb-4cc7-9adf-840c57b4ae6f ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ Expiration: 2024-10-15 23:59:59 ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ║ ║
meta-llama-llama-guard-3-8b-v4-mkmlizer: ╚═════════════════════════════════════════════════════════════════════╝
meta-llama-llama-guard-3-8b-v4-mkmlizer: Downloaded to shared memory in 33.491s
meta-llama-llama-guard-3-8b-v4-mkmlizer: quantizing model to /dev/shm/model_cache, profile:s0, folder:/tmp/tmpqjubpykj, device:0
meta-llama-llama-guard-3-8b-v4-mkmlizer: Saving flywheel model at /dev/shm/model_cache
meta-llama-llama-guard-3-8b-v4-mkmlizer: quantized model in 25.727s
meta-llama-llama-guard-3-8b-v4-mkmlizer: Processed model meta-llama/Llama-Guard-3-8B in 59.218s
meta-llama-llama-guard-3-8b-v4-mkmlizer: creating bucket guanaco-mkml-models
meta-llama-llama-guard-3-8b-v4-mkmlizer: Bucket 's3://guanaco-mkml-models/' created
meta-llama-llama-guard-3-8b-v4-mkmlizer: uploading /dev/shm/model_cache to s3://guanaco-mkml-models/meta-llama-llama-guard-3-8b-v4
meta-llama-llama-guard-3-8b-v4-mkmlizer: cp /dev/shm/model_cache/config.json s3://guanaco-mkml-models/meta-llama-llama-guard-3-8b-v4/config.json
meta-llama-llama-guard-3-8b-v4-mkmlizer: cp /dev/shm/model_cache/special_tokens_map.json s3://guanaco-mkml-models/meta-llama-llama-guard-3-8b-v4/special_tokens_map.json
meta-llama-llama-guard-3-8b-v4-mkmlizer: cp /dev/shm/model_cache/tokenizer_config.json s3://guanaco-mkml-models/meta-llama-llama-guard-3-8b-v4/tokenizer_config.json
meta-llama-llama-guard-3-8b-v4-mkmlizer: cp /dev/shm/model_cache/tokenizer.json s3://guanaco-mkml-models/meta-llama-llama-guard-3-8b-v4/tokenizer.json
meta-llama-llama-guard-3-8b-v4-mkmlizer: cp /dev/shm/model_cache/flywheel_model.0.safetensors s3://guanaco-mkml-models/meta-llama-llama-guard-3-8b-v4/flywheel_model.0.safetensors
meta-llama-llama-guard-3-8b-v4-mkmlizer: Loading 0: 0%| | 0/291 [00:00<?, ?it/s] Loading 0: 2%|▏ | 5/291 [00:00<00:07, 36.83it/s] Loading 0: 5%|▍ | 14/291 [00:00<00:05, 49.06it/s] Loading 0: 8%|▊ | 23/291 [00:00<00:05, 49.49it/s] Loading 0: 11%|█ | 31/291 [00:00<00:04, 57.77it/s] Loading 0: 13%|█▎ | 38/291 [00:00<00:04, 57.70it/s] Loading 0: 15%|█▌ | 44/291 [00:00<00:04, 54.50it/s] Loading 0: 17%|█▋ | 50/291 [00:00<00:04, 48.46it/s] Loading 0: 20%|██ | 59/291 [00:01<00:04, 50.05it/s] Loading 0: 23%|██▎ | 68/291 [00:01<00:04, 51.04it/s] Loading 0: 26%|██▌ | 76/291 [00:01<00:03, 57.38it/s] Loading 0: 29%|██▊ | 83/291 [00:01<00:05, 38.41it/s] Loading 0: 30%|███ | 88/291 [00:01<00:05, 38.93it/s] Loading 0: 32%|███▏ | 94/291 [00:01<00:04, 41.84it/s] Loading 0: 34%|███▍ | 100/291 [00:02<00:04, 41.41it/s] Loading 0: 36%|███▌ | 105/291 [00:02<00:04, 42.79it/s] Loading 0: 38%|███▊ | 112/291 [00:02<00:03, 48.78it/s] Loading 0: 41%|████ | 118/291 [00:02<00:03, 46.61it/s] Loading 0: 42%|████▏ | 123/291 [00:02<00:03, 43.95it/s] Loading 0: 45%|████▍ | 130/291 [00:02<00:03, 49.77it/s] Loading 0: 47%|████▋ | 136/291 [00:02<00:03, 45.18it/s] Loading 0: 48%|████▊ | 141/291 [00:03<00:03, 45.67it/s] Loading 0: 51%|█████ | 148/291 [00:03<00:02, 51.27it/s] Loading 0: 53%|█████▎ | 154/291 [00:03<00:02, 47.48it/s] Loading 0: 55%|█████▍ | 159/291 [00:03<00:02, 47.81it/s] Loading 0: 57%|█████▋ | 166/291 [00:03<00:02, 53.57it/s] Loading 0: 59%|█████▉ | 172/291 [00:03<00:02, 51.72it/s] Loading 0: 62%|██████▏ | 179/291 [00:03<00:02, 55.05it/s] Loading 0: 64%|██████▎ | 185/291 [00:03<00:01, 55.99it/s] Loading 0: 66%|██████▌ | 191/291 [00:04<00:02, 33.71it/s] Loading 0: 67%|██████▋ | 196/291 [00:04<00:02, 35.53it/s] Loading 0: 69%|██████▉ | 202/291 [00:04<00:02, 39.68it/s] Loading 0: 71%|███████▏ | 208/291 [00:04<00:02, 39.56it/s] Loading 0: 73%|███████▎ | 213/291 [00:04<00:01, 39.75it/s] Loading 0: 76%|███████▌ | 220/291 [00:04<00:01, 46.10it/s] Loading 0: 78%|███████▊ | 226/291 [00:04<00:01, 46.01it/s] Loading 0: 79%|███████▉ | 231/291 [00:04<00:01, 46.31it/s] Loading 0: 82%|████████▏ | 238/291 [00:05<00:01, 51.13it/s] Loading 0: 84%|████████▍ | 244/291 [00:05<00:01, 46.27it/s] Loading 0: 86%|████████▌ | 249/291 [00:05<00:00, 46.53it/s] Loading 0: 88%|████████▊ | 255/291 [00:05<00:00, 49.38it/s] Loading 0: 90%|████████▉ | 261/291 [00:05<00:00, 49.70it/s] Loading 0: 92%|█████████▏| 267/291 [00:05<00:00, 45.24it/s] Loading 0: 94%|█████████▍| 274/291 [00:05<00:00, 51.17it/s] Loading 0: 96%|█████████▌| 280/291 [00:05<00:00, 48.36it/s] Loading 0: 98%|█████████▊| 286/291 [00:06<00:00, 45.42it/s] Loading 0: 100%|██████████| 291/291 [00:11<00:00, 3.43it/s]
Job meta-llama-llama-guard-3-8b-v4-mkmlizer completed after 185.73s with status: succeeded
Stopping job with name meta-llama-llama-guard-3-8b-v4-mkmlizer
Pipeline stage MKMLizer completed in 187.19s
run pipeline stage %s
Running pipeline stage MKMLTemplater
Pipeline stage MKMLTemplater completed in 0.45s
run pipeline stage %s
Running pipeline stage MKMLDeployer
Creating inference service meta-llama-llama-guard-3-8b-v4
Waiting for inference service meta-llama-llama-guard-3-8b-v4 to be ready
Inference service meta-llama-llama-guard-3-8b-v4 ready after 131.86314606666565s
Pipeline stage MKMLDeployer completed in 133.38s
run pipeline stage %s
Running pipeline stage StressChecker
HTTP Request: %s %s "%s %d %s"
Received healthy response to inference request in 2.7093188762664795s
HTTP Request: %s %s "%s %d %s"
Received healthy response to inference request in 1.9086909294128418s
HTTP Request: %s %s "%s %d %s"
Received healthy response to inference request in 0.9912419319152832s
HTTP Request: %s %s "%s %d %s"
Received healthy response to inference request in 1.4037630558013916s
HTTP Request: %s %s "%s %d %s"
Received healthy response to inference request in 1.602254867553711s
5 requests
0 failed requests
5th percentile: 1.0737461566925048
10th percentile: 1.1562503814697265
20th percentile: 1.32125883102417
30th percentile: 1.4434614181518555
40th percentile: 1.5228581428527832
50th percentile: 1.602254867553711
60th percentile: 1.7248292922973634
70th percentile: 1.8474037170410156
80th percentile: 2.0688165187835694
90th percentile: 2.3890676975250242
95th percentile: 2.549193286895752
99th percentile: 2.677293758392334
mean time: 1.7230539321899414
Pipeline stage StressChecker completed in 12.27s
run pipeline stage %s
Running pipeline stage TriggerMKMLProfilingPipeline
run_pipeline:run_in_cloud %s
starting trigger_guanaco_pipeline args=%s
Pipeline stage TriggerMKMLProfilingPipeline completed in 3.87s
Shutdown handler de-registered
meta-llama-llama-guard-3-8b_v4 status is now deployed due to DeploymentManager action
Shutdown handler registered
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeleter
Skipping teardown as no inference service was successfully deployed
Pipeline stage MKMLProfilerDeleter completed in 0.12s
run pipeline stage %s
Running pipeline stage MKMLProfilerTemplater
Pipeline stage MKMLProfilerTemplater completed in 0.11s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeployer
Creating inference service meta-llama-llama-guard-3-8b-v4-profiler
Waiting for inference service meta-llama-llama-guard-3-8b-v4-profiler to be ready
Inference service meta-llama-llama-guard-3-8b-v4-profiler ready after 150.3581998348236s
Pipeline stage MKMLProfilerDeployer completed in 150.70s
run pipeline stage %s
Running pipeline stage MKMLProfilerRunner
kubectl cp /code/guanaco/guanaco_inference_services/src/inference_scripts tenant-chaiml-guanaco/meta-llama-llama-guad1da7cc49ea7a2b7a1ef36c2551930aa-deplo6crh9:/code/chaiverse_profiler_1725585608 --namespace tenant-chaiml-guanaco
kubectl exec -it meta-llama-llama-guad1da7cc49ea7a2b7a1ef36c2551930aa-deplo6crh9 --namespace tenant-chaiml-guanaco -- sh -c 'cd /code/chaiverse_profiler_1725585608 && python profiles.py profile --best_of_n 1 --auto_batch 5 --batches 1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125,130,135,140,145,150,155,160,165,170,175,180,185,190,195 --samples 200 --input_tokens 2048 --output_tokens 64 --summary /code/chaiverse_profiler_1725585608/summary.json'
kubectl exec -it meta-llama-llama-guad1da7cc49ea7a2b7a1ef36c2551930aa-deplo6crh9 --namespace tenant-chaiml-guanaco -- bash -c 'cat /code/chaiverse_profiler_1725585608/summary.json'
Pipeline stage MKMLProfilerRunner completed in 899.74s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeleter
Checking if service meta-llama-llama-guard-3-8b-v4-profiler is running
Tearing down inference service meta-llama-llama-guard-3-8b-v4-profiler
Service meta-llama-llama-guard-3-8b-v4-profiler has been torndown
Pipeline stage MKMLProfilerDeleter completed in 1.68s
Shutdown handler de-registered
meta-llama-llama-guard-3-8b_v4 status is now inactive due to auto deactivation removed underperforming models

Usage Metrics

Latency Metrics