submission_id: locutusque-apollo-2-0-ll_3599_v3
developer_uid: Locutusque
best_of: 6
celo_rating: 1220.59
display_name: locutusque-apollo-2-0-ll_3599_v3
family_friendly_score: 0.0
formatter: {'memory_template': '<|im_start|>system\n{memory}<|im_end|>\n', 'prompt_template': '<|im_start|>user\n{prompt}<|im_end|>\n', 'bot_template': '<|im_start|>assistant\n{bot_name}: {message}<|im_end|>\n', 'user_template': '<|im_start|>user\n{user_name}: {message}<|im_end|>\n', 'response_template': '<|im_start|>assistant\n{bot_name}:', 'truncate_by_message': False}
generation_params: {'temperature': 1.3, 'top_p': 0.95, 'min_p': 0.05, 'top_k': 40, 'presence_penalty': 0.05, 'frequency_penalty': 0.0, 'stopping_words': ['<|im_end|>', '<|im_start|>'], 'max_input_tokens': 1024, 'best_of': 6, 'max_output_tokens': 64}
gpu_counts: {'NVIDIA RTX A5000': 1}
is_internal_developer: False
language_model: Locutusque/Apollo-2.0-Llama-3.1-8B
latencies: [{'batch_size': 1, 'throughput': 0.8768344664876164, 'latency_mean': 1.1403699266910552, 'latency_p50': 1.139601469039917, 'latency_p90': 1.2567686796188355}, {'batch_size': 4, 'throughput': 1.9373762064645115, 'latency_mean': 2.052054258584976, 'latency_p50': 2.058983564376831, 'latency_p90': 2.2753950357437134}, {'batch_size': 5, 'throughput': 2.107507136207165, 'latency_mean': 2.3647428011894225, 'latency_p50': 2.390956401824951, 'latency_p90': 2.640138602256775}, {'batch_size': 8, 'throughput': 2.354496448754391, 'latency_mean': 3.3607704055309298, 'latency_p50': 3.3848540782928467, 'latency_p90': 3.8428706645965574}, {'batch_size': 10, 'throughput': 2.439102719148581, 'latency_mean': 4.069735119342804, 'latency_p50': 4.051482200622559, 'latency_p90': 4.539907670021057}, {'batch_size': 12, 'throughput': 2.4738708807078105, 'latency_mean': 4.8090136551857, 'latency_p50': 4.821054697036743, 'latency_p90': 5.435518860816956}, {'batch_size': 15, 'throughput': 2.507403635271489, 'latency_mean': 5.9224970233440395, 'latency_p50': 5.895723342895508, 'latency_p90': 6.680196070671081}]
max_input_tokens: 1024
max_output_tokens: 64
model_architecture: LlamaForCausalLM
model_group: Locutusque/Apollo-2.0-Ll
model_name: locutusque-apollo-2-0-ll_3599_v3
model_num_parameters: 8030261248.0
model_repo: Locutusque/Apollo-2.0-Llama-3.1-8B
model_size: 8B
num_battles: 12671
num_wins: 5887
ranking_group: single
status: torndown
submission_type: basic
throughput_3p7s: 2.42
timestamp: 2024-09-09T04:12:18+00:00
us_pacific_date: 2024-09-08
win_ratio: 0.46460421434772314
Download Preference Data
Resubmit model
Shutdown handler not registered because Python interpreter is not running in the main thread
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLizer
Starting job with name locutusque-apollo-2-0-ll-3599-v3-mkmlizer
Waiting for job on locutusque-apollo-2-0-ll-3599-v3-mkmlizer to finish
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ╔═════════════════════════════════════════════════════════════════════╗
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ _____ __ __ ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ / _/ /_ ___ __/ / ___ ___ / / ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ / _/ / // / |/|/ / _ \/ -_) -_) / ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ /_//_/\_, /|__,__/_//_/\__/\__/_/ ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ /___/ ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ Version: 0.10.1 ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ Copyright 2023 MK ONE TECHNOLOGIES Inc. ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ https://mk1.ai ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ The license key for the current software has been verified as ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ belonging to: ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ Chai Research Corp. ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ Account ID: 7997a29f-0ceb-4cc7-9adf-840c57b4ae6f ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ Expiration: 2024-10-15 23:59:59 ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ║ ║
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: ╚═════════════════════════════════════════════════════════════════════╝
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Retrying (%r) after connection broken by '%r': %s
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: Downloaded to shared memory in 98.789s
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: quantizing model to /dev/shm/model_cache, profile:s0, folder:/tmp/tmpujo4u5p_, device:0
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: Saving flywheel model at /dev/shm/model_cache
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: quantized model in 26.982s
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: Processed model Locutusque/Apollo-2.0-Llama-3.1-8B in 125.771s
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: creating bucket guanaco-mkml-models
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: Bucket 's3://guanaco-mkml-models/' created
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: uploading /dev/shm/model_cache to s3://guanaco-mkml-models/locutusque-apollo-2-0-ll-3599-v3
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: cp /dev/shm/model_cache/config.json s3://guanaco-mkml-models/locutusque-apollo-2-0-ll-3599-v3/config.json
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: cp /dev/shm/model_cache/special_tokens_map.json s3://guanaco-mkml-models/locutusque-apollo-2-0-ll-3599-v3/special_tokens_map.json
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: cp /dev/shm/model_cache/tokenizer_config.json s3://guanaco-mkml-models/locutusque-apollo-2-0-ll-3599-v3/tokenizer_config.json
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: cp /dev/shm/model_cache/tokenizer.json s3://guanaco-mkml-models/locutusque-apollo-2-0-ll-3599-v3/tokenizer.json
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: cp /dev/shm/model_cache/flywheel_model.0.safetensors s3://guanaco-mkml-models/locutusque-apollo-2-0-ll-3599-v3/flywheel_model.0.safetensors
locutusque-apollo-2-0-ll-3599-v3-mkmlizer: Loading 0: 0%| | 0/291 [00:00<?, ?it/s] Loading 0: 2%|▏ | 5/291 [00:00<00:08, 35.25it/s] Loading 0: 5%|▍ | 14/291 [00:00<00:05, 47.23it/s] Loading 0: 8%|▊ | 22/291 [00:00<00:05, 49.82it/s] Loading 0: 9%|▉ | 27/291 [00:00<00:07, 37.50it/s] Loading 0: 11%|█ | 32/291 [00:00<00:06, 38.28it/s] Loading 0: 14%|█▎ | 40/291 [00:00<00:05, 48.32it/s] Loading 0: 16%|█▌ | 46/291 [00:01<00:05, 46.64it/s] Loading 0: 18%|█▊ | 51/291 [00:01<00:05, 44.66it/s] Loading 0: 20%|██ | 59/291 [00:01<00:04, 53.44it/s] Loading 0: 22%|██▏ | 65/291 [00:01<00:05, 39.25it/s] Loading 0: 25%|██▍ | 72/291 [00:01<00:05, 40.96it/s] Loading 0: 26%|██▋ | 77/291 [00:01<00:05, 41.02it/s] Loading 0: 29%|██▉ | 85/291 [00:01<00:04, 48.37it/s] Loading 0: 31%|███▏ | 91/291 [00:02<00:04, 44.79it/s] Loading 0: 33%|███▎ | 96/291 [00:02<00:04, 45.02it/s] Loading 0: 35%|███▍ | 101/291 [00:02<00:04, 38.38it/s] Loading 0: 36%|███▋ | 106/291 [00:02<00:04, 38.78it/s] Loading 0: 38%|███▊ | 112/291 [00:02<00:04, 43.22it/s] Loading 0: 40%|████ | 117/291 [00:02<00:03, 44.32it/s] Loading 0: 42%|████▏ | 122/291 [00:02<00:04, 37.82it/s] Loading 0: 45%|████▍ | 130/291 [00:02<00:03, 46.88it/s] Loading 0: 47%|████▋ | 136/291 [00:03<00:03, 45.15it/s] Loading 0: 49%|████▉ | 142/291 [00:03<00:03, 40.96it/s] Loading 0: 51%|█████ | 147/291 [00:03<00:03, 40.51it/s] Loading 0: 52%|█████▏ | 152/291 [00:03<00:03, 41.74it/s] Loading 0: 54%|█████▍ | 157/291 [00:03<00:03, 41.00it/s] Loading 0: 56%|█████▌ | 162/291 [00:03<00:03, 42.57it/s] Loading 0: 57%|█████▋ | 167/291 [00:03<00:03, 37.21it/s] Loading 0: 60%|██████ | 175/291 [00:04<00:02, 46.36it/s] Loading 0: 62%|██████▏ | 181/291 [00:04<00:02, 43.18it/s] Loading 0: 64%|██████▍ | 186/291 [00:04<00:03, 33.98it/s] Loading 0: 66%|██████▌ | 192/291 [00:04<00:02, 39.18it/s] Loading 0: 68%|██████▊ | 197/291 [00:04<00:02, 39.42it/s] Loading 0: 69%|██████▉ | 202/291 [00:04<00:02, 39.28it/s] Loading 0: 71%|███████ | 207/291 [00:04<00:02, 41.19it/s] Loading 0: 73%|███████▎ | 212/291 [00:05<00:02, 34.53it/s] Loading 0: 76%|███████▌ | 221/291 [00:05<00:01, 46.57it/s] Loading 0: 78%|███████▊ | 227/291 [00:05<00:01, 37.33it/s] Loading 0: 80%|███████▉ | 232/291 [00:05<00:01, 38.94it/s] Loading 0: 82%|████████▏ | 238/291 [00:05<00:01, 43.29it/s] Loading 0: 84%|████████▍ | 244/291 [00:05<00:01, 42.45it/s] Loading 0: 86%|████████▌ | 249/291 [00:05<00:01, 41.97it/s] Loading 0: 88%|████████▊ | 255/291 [00:06<00:00, 45.18it/s] Loading 0: 89%|████████▉ | 260/291 [00:06<00:00, 43.30it/s] Loading 0: 91%|█████████ | 265/291 [00:06<00:00, 36.33it/s] Loading 0: 92%|█████████▏| 269/291 [00:06<00:00, 37.09it/s] Loading 0: 94%|█████████▍| 274/291 [00:06<00:00, 39.56it/s] Loading 0: 96%|█████████▌| 280/291 [00:06<00:00, 40.33it/s] Loading 0: 98%|█████████▊| 285/291 [00:06<00:00, 39.85it/s] Loading 0: 100%|██████████| 291/291 [00:12<00:00, 3.25it/s]
Job locutusque-apollo-2-0-ll-3599-v3-mkmlizer completed after 145.96s with status: succeeded
Stopping job with name locutusque-apollo-2-0-ll-3599-v3-mkmlizer
Pipeline stage MKMLizer completed in 147.03s
run pipeline stage %s
Running pipeline stage MKMLTemplater
Pipeline stage MKMLTemplater completed in 0.10s
run pipeline stage %s
Running pipeline stage MKMLDeployer
Creating inference service locutusque-apollo-2-0-ll-3599-v3
Waiting for inference service locutusque-apollo-2-0-ll-3599-v3 to be ready
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Inference service locutusque-apollo-2-0-ll-3599-v3 ready after 150.72222208976746s
Pipeline stage MKMLDeployer completed in 151.05s
run pipeline stage %s
Running pipeline stage StressChecker
Received healthy response to inference request in 3.1201980113983154s
Received healthy response to inference request in 2.2357664108276367s
Received healthy response to inference request in 2.6443872451782227s
Received healthy response to inference request in 1.7576065063476562s
Received healthy response to inference request in 2.0408406257629395s
5 requests
0 failed requests
5th percentile: 1.814253330230713
10th percentile: 1.8709001541137695
20th percentile: 1.9841938018798828
30th percentile: 2.079825782775879
40th percentile: 2.157796096801758
50th percentile: 2.2357664108276367
60th percentile: 2.399214744567871
70th percentile: 2.5626630783081055
80th percentile: 2.739549398422241
90th percentile: 2.9298737049102783
95th percentile: 3.025035858154297
99th percentile: 3.1011655807495115
mean time: 2.3597597599029543
Pipeline stage StressChecker completed in 12.42s
run pipeline stage %s
Running pipeline stage TriggerMKMLProfilingPipeline
run_pipeline:run_in_cloud %s
starting trigger_guanaco_pipeline args=%s
Pipeline stage TriggerMKMLProfilingPipeline completed in 6.08s
Shutdown handler de-registered
locutusque-apollo-2-0-ll_3599_v3 status is now deployed due to DeploymentManager action
Shutdown handler registered
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeleter
Skipping teardown as no inference service was successfully deployed
Pipeline stage MKMLProfilerDeleter completed in 0.11s
run pipeline stage %s
Running pipeline stage MKMLProfilerTemplater
Pipeline stage MKMLProfilerTemplater completed in 0.10s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeployer
Creating inference service locutusque-apollo-2-0-ll-3599-v3-profiler
Waiting for inference service locutusque-apollo-2-0-ll-3599-v3-profiler to be ready
Inference service locutusque-apollo-2-0-ll-3599-v3-profiler ready after 150.35668659210205s
Pipeline stage MKMLProfilerDeployer completed in 150.68s
run pipeline stage %s
Running pipeline stage MKMLProfilerRunner
kubectl cp /code/guanaco/guanaco_inference_services/src/inference_scripts tenant-chaiml-guanaco/locutusque-apollo-2-54643514292f96fa6b9b381c51910489-deplobz2jv:/code/chaiverse_profiler_1725855642 --namespace tenant-chaiml-guanaco
kubectl exec -it locutusque-apollo-2-54643514292f96fa6b9b381c51910489-deplobz2jv --namespace tenant-chaiml-guanaco -- sh -c 'cd /code/chaiverse_profiler_1725855642 && python profiles.py profile --best_of_n 6 --auto_batch 5 --batches 1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125,130,135,140,145,150,155,160,165,170,175,180,185,190,195 --samples 200 --input_tokens 1024 --output_tokens 64 --summary /code/chaiverse_profiler_1725855642/summary.json'
kubectl exec -it locutusque-apollo-2-54643514292f96fa6b9b381c51910489-deplobz2jv --namespace tenant-chaiml-guanaco -- bash -c 'cat /code/chaiverse_profiler_1725855642/summary.json'
Pipeline stage MKMLProfilerRunner completed in 760.97s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeleter
Checking if service locutusque-apollo-2-0-ll-3599-v3-profiler is running
Tearing down inference service locutusque-apollo-2-0-ll-3599-v3-profiler
Service locutusque-apollo-2-0-ll-3599-v3-profiler has been torndown
Pipeline stage MKMLProfilerDeleter completed in 1.65s
Shutdown handler de-registered
locutusque-apollo-2-0-ll_3599_v3 status is now inactive due to auto deactivation removed underperforming models
locutusque-apollo-2-0-ll_3599_v3 status is now torndown due to DeploymentManager action