submission_id: jic062-nemo-v1-6_v1
developer_uid: chace9580
best_of: 8
celo_rating: 1253.8
display_name: jic062-nemo-v1-6_v1
family_friendly_score: 0.0
formatter: {'memory_template': '[INST]system\n{memory}[/INST]\n', 'prompt_template': '[INST]user\n{prompt}[/INST]\n', 'bot_template': '[INST]assistant\n{bot_name}: {message}[/INST]\n', 'user_template': '[INST]user\n{user_name}: {message}[/INST]\n', 'response_template': '[INST]assistant\n{bot_name}:', 'truncate_by_message': False}
generation_params: {'temperature': 0.85, 'top_p': 1.0, 'min_p': 0.05, 'top_k': 80, 'presence_penalty': 0.0, 'frequency_penalty': 0.0, 'stopping_words': ['\n', '[/INST]'], 'max_input_tokens': 512, 'best_of': 8, 'max_output_tokens': 64}
gpu_counts: {'NVIDIA RTX A5000': 1}
is_internal_developer: False
language_model: jic062/Nemo-v1.6
latencies: [{'batch_size': 1, 'throughput': 0.6942275376261743, 'latency_mean': 1.44036106467247, 'latency_p50': 1.429706335067749, 'latency_p90': 1.600719404220581}, {'batch_size': 3, 'throughput': 1.335752792078313, 'latency_mean': 2.244500153064728, 'latency_p50': 2.25449001789093, 'latency_p90': 2.5059308528900144}, {'batch_size': 5, 'throughput': 1.571657507291668, 'latency_mean': 3.1667578446865083, 'latency_p50': 3.1702964305877686, 'latency_p90': 3.555490827560425}, {'batch_size': 6, 'throughput': 1.6264190917649066, 'latency_mean': 3.663358424901962, 'latency_p50': 3.6945478916168213, 'latency_p90': 4.1116128921508786}, {'batch_size': 8, 'throughput': 1.61165006824876, 'latency_mean': 4.9327673864364625, 'latency_p50': 4.947274088859558, 'latency_p90': 5.512874293327331}, {'batch_size': 10, 'throughput': 1.5679882152057605, 'latency_mean': 6.340331894159317, 'latency_p50': 6.317824721336365, 'latency_p90': 7.2601547002792355}]
max_input_tokens: 512
max_output_tokens: 64
model_architecture: MistralForCausalLM
model_group: jic062/Nemo-v1.6
model_name: jic062-nemo-v1-6_v1
model_num_parameters: 12772070400.0
model_repo: jic062/Nemo-v1.6
model_size: 13B
num_battles: 17563
num_wins: 8865
ranking_group: single
status: torndown
submission_type: basic
throughput_3p7s: 1.64
timestamp: 2024-09-19T16:58:13+00:00
us_pacific_date: 2024-09-19
win_ratio: 0.5047543130444685
Resubmit model
Shutdown handler not registered because Python interpreter is not running in the main thread
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLizer
Starting job with name jic062-nemo-v1-6-v1-mkmlizer
Waiting for job on jic062-nemo-v1-6-v1-mkmlizer to finish
jic062-nemo-v1-6-v1-mkmlizer: ╔═════════════════════════════════════════════════════════════════════╗
jic062-nemo-v1-6-v1-mkmlizer: ║ _____ __ __ ║
jic062-nemo-v1-6-v1-mkmlizer: ║ / _/ /_ ___ __/ / ___ ___ / / ║
jic062-nemo-v1-6-v1-mkmlizer: ║ / _/ / // / |/|/ / _ \/ -_) -_) / ║
jic062-nemo-v1-6-v1-mkmlizer: ║ /_//_/\_, /|__,__/_//_/\__/\__/_/ ║
jic062-nemo-v1-6-v1-mkmlizer: ║ /___/ ║
jic062-nemo-v1-6-v1-mkmlizer: ║ ║
jic062-nemo-v1-6-v1-mkmlizer: ║ Version: 0.10.1 ║
jic062-nemo-v1-6-v1-mkmlizer: ║ Copyright 2023 MK ONE TECHNOLOGIES Inc. ║
jic062-nemo-v1-6-v1-mkmlizer: ║ https://mk1.ai ║
jic062-nemo-v1-6-v1-mkmlizer: ║ ║
jic062-nemo-v1-6-v1-mkmlizer: ║ The license key for the current software has been verified as ║
jic062-nemo-v1-6-v1-mkmlizer: ║ belonging to: ║
jic062-nemo-v1-6-v1-mkmlizer: ║ ║
jic062-nemo-v1-6-v1-mkmlizer: ║ Chai Research Corp. ║
jic062-nemo-v1-6-v1-mkmlizer: ║ Account ID: 7997a29f-0ceb-4cc7-9adf-840c57b4ae6f ║
jic062-nemo-v1-6-v1-mkmlizer: ║ Expiration: 2024-10-15 23:59:59 ║
jic062-nemo-v1-6-v1-mkmlizer: ║ ║
jic062-nemo-v1-6-v1-mkmlizer: ╚═════════════════════════════════════════════════════════════════════╝
jic062-nemo-v1-6-v1-mkmlizer: Downloaded to shared memory in 51.276s
jic062-nemo-v1-6-v1-mkmlizer: quantizing model to /dev/shm/model_cache, profile:s0, folder:/tmp/tmp81e5y_oc, device:0
jic062-nemo-v1-6-v1-mkmlizer: Saving flywheel model at /dev/shm/model_cache
jic062-nemo-v1-6-v1-mkmlizer: quantized model in 34.882s
jic062-nemo-v1-6-v1-mkmlizer: Processed model jic062/Nemo-v1.6 in 86.158s
jic062-nemo-v1-6-v1-mkmlizer: creating bucket guanaco-mkml-models
jic062-nemo-v1-6-v1-mkmlizer: Bucket 's3://guanaco-mkml-models/' created
jic062-nemo-v1-6-v1-mkmlizer: uploading /dev/shm/model_cache to s3://guanaco-mkml-models/jic062-nemo-v1-6-v1
jic062-nemo-v1-6-v1-mkmlizer: cp /dev/shm/model_cache/config.json s3://guanaco-mkml-models/jic062-nemo-v1-6-v1/config.json
jic062-nemo-v1-6-v1-mkmlizer: cp /dev/shm/model_cache/special_tokens_map.json s3://guanaco-mkml-models/jic062-nemo-v1-6-v1/special_tokens_map.json
jic062-nemo-v1-6-v1-mkmlizer: cp /dev/shm/model_cache/tokenizer_config.json s3://guanaco-mkml-models/jic062-nemo-v1-6-v1/tokenizer_config.json
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
jic062-nemo-v1-6-v1-mkmlizer: cp /dev/shm/model_cache/flywheel_model.0.safetensors s3://guanaco-mkml-models/jic062-nemo-v1-6-v1/flywheel_model.0.safetensors
jic062-nemo-v1-6-v1-mkmlizer: Loading 0: 0%| | 0/363 [00:00<?, ?it/s] Loading 0: 1%|▏ | 5/363 [00:00<00:09, 36.97it/s] Loading 0: 4%|▎ | 13/363 [00:00<00:05, 58.63it/s] Loading 0: 6%|▌ | 20/363 [00:00<00:06, 55.03it/s] Loading 0: 7%|▋ | 26/363 [00:00<00:06, 51.46it/s] Loading 0: 9%|▉ | 32/363 [00:00<00:07, 43.92it/s] Loading 0: 11%|█ | 40/363 [00:00<00:06, 50.93it/s] Loading 0: 13%|█▎ | 46/363 [00:00<00:06, 48.27it/s] Loading 0: 14%|█▍ | 51/363 [00:01<00:06, 46.98it/s] Loading 0: 16%|█▋ | 59/363 [00:01<00:05, 54.85it/s] Loading 0: 18%|█▊ | 65/363 [00:01<00:08, 35.11it/s] Loading 0: 20%|█▉ | 72/363 [00:01<00:07, 40.91it/s] Loading 0: 21%|██▏ | 78/363 [00:01<00:07, 40.07it/s] Loading 0: 23%|██▎ | 83/363 [00:01<00:06, 40.49it/s] Loading 0: 25%|██▍ | 90/363 [00:01<00:05, 45.83it/s] Loading 0: 26%|██▋ | 96/363 [00:02<00:06, 44.11it/s] Loading 0: 28%|██▊ | 101/363 [00:02<00:06, 42.90it/s] Loading 0: 30%|███ | 109/363 [00:02<00:04, 51.23it/s] Loading 0: 32%|███▏ | 115/363 [00:02<00:05, 46.98it/s] Loading 0: 33%|███▎ | 120/363 [00:02<00:05, 45.14it/s] Loading 0: 35%|███▍ | 126/363 [00:02<00:05, 47.12it/s] Loading 0: 36%|███▋ | 132/363 [00:02<00:05, 44.14it/s] Loading 0: 38%|███▊ | 137/363 [00:03<00:05, 43.18it/s] Loading 0: 39%|███▉ | 142/363 [00:03<00:06, 35.33it/s] Loading 0: 40%|████ | 146/363 [00:03<00:06, 35.86it/s] Loading 0: 41%|████▏ | 150/363 [00:03<00:06, 34.94it/s] Loading 0: 43%|████▎ | 156/363 [00:03<00:05, 40.69it/s] Loading 0: 44%|████▍ | 161/363 [00:03<00:04, 42.33it/s] Loading 0: 46%|████▌ | 166/363 [00:03<00:04, 43.84it/s] Loading 0: 47%|████▋ | 172/363 [00:03<00:04, 41.91it/s] Loading 0: 49%|████▉ | 177/363 [00:04<00:04, 41.99it/s] Loading 0: 51%|█████ | 184/363 [00:04<00:03, 47.38it/s] Loading 0: 52%|█████▏ | 190/363 [00:04<00:03, 45.33it/s] Loading 0: 54%|█████▎ | 195/363 [00:04<00:03, 43.31it/s] Loading 0: 56%|█████▌ | 202/363 [00:04<00:03, 47.87it/s] Loading 0: 57%|█████▋ | 208/363 [00:04<00:03, 44.27it/s] Loading 0: 59%|█████▊ | 213/363 [00:04<00:03, 43.28it/s] Loading 0: 60%|██████ | 218/363 [00:04<00:03, 43.89it/s] Loading 0: 61%|██████▏ | 223/363 [00:05<00:04, 34.32it/s] Loading 0: 63%|██████▎ | 227/363 [00:05<00:03, 35.33it/s] Loading 0: 64%|██████▎ | 231/363 [00:05<00:03, 34.84it/s] Loading 0: 65%|██████▌ | 237/363 [00:05<00:03, 40.74it/s] Loading 0: 67%|██████▋ | 242/363 [00:05<00:02, 40.78it/s] Loading 0: 68%|██████▊ | 247/363 [00:05<00:02, 42.31it/s] Loading 0: 70%|██████▉ | 253/363 [00:05<00:02, 41.86it/s] Loading 0: 71%|███████ | 258/363 [00:06<00:02, 41.17it/s] Loading 0: 73%|███████▎ | 264/363 [00:06<00:02, 44.81it/s] Loading 0: 74%|███████▍ | 269/363 [00:06<00:02, 45.24it/s] Loading 0: 75%|███████▌ | 274/363 [00:06<00:01, 45.83it/s] Loading 0: 77%|███████▋ | 280/363 [00:06<00:01, 44.20it/s] Loading 0: 79%|███████▊ | 285/363 [00:06<00:01, 42.09it/s] Loading 0: 80%|████████ | 291/363 [00:06<00:01, 46.61it/s] Loading 0: 82%|████████▏ | 296/363 [00:06<00:01, 46.96it/s] Loading 0: 83%|████████▎ | 301/363 [00:06<00:01, 46.67it/s] Loading 0: 84%|████████▍ | 306/363 [00:13<00:22, 2.48it/s] Loading 0: 85%|████████▌ | 310/363 [00:13<00:16, 3.23it/s] Loading 0: 87%|████████▋ | 316/363 [00:13<00:09, 4.81it/s] Loading 0: 88%|████████▊ | 321/363 [00:14<00:06, 6.36it/s] Loading 0: 91%|█████████ | 329/363 [00:14<00:03, 10.07it/s] Loading 0: 92%|█████████▏| 335/363 [00:14<00:02, 13.06it/s] Loading 0: 94%|█████████▎| 340/363 [00:14<00:01, 16.08it/s] Loading 0: 96%|█████████▌| 347/363 [00:14<00:00, 21.81it/s] Loading 0: 97%|█████████▋| 353/363 [00:14<00:00, 25.19it/s] Loading 0: 99%|█████████▊| 358/363 [00:14<00:00, 28.49it/s]
Job jic062-nemo-v1-6-v1-mkmlizer completed after 106.7s with status: succeeded
Stopping job with name jic062-nemo-v1-6-v1-mkmlizer
Pipeline stage MKMLizer completed in 108.41s
run pipeline stage %s
Running pipeline stage MKMLTemplater
Pipeline stage MKMLTemplater completed in 0.12s
run pipeline stage %s
Running pipeline stage MKMLDeployer
Creating inference service jic062-nemo-v1-6-v1
Waiting for inference service jic062-nemo-v1-6-v1 to be ready
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Failed to get response for submission mistralai-mixtral-8x7b_3473_v136: ('http://mistralai-mixtral-8x7b-3473-v136-predictor.tenant-chaiml-guanaco.k.chaiverse.com/v1/models/GPT-J-6B-lit-v2:predict', 'read tcp 127.0.0.1:37888->127.0.0.1:8080: read: connection reset by peer\n')
Inference service jic062-nemo-v1-6-v1 ready after 201.56679797172546s
Pipeline stage MKMLDeployer completed in 202.06s
run pipeline stage %s
Running pipeline stage StressChecker
Received healthy response to inference request in 2.4047510623931885s
Received healthy response to inference request in 2.2273752689361572s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Connection pool is full, discarding connection: %s. Connection pool size: %s
Received healthy response to inference request in 1.660611629486084s
Received healthy response to inference request in 1.9243860244750977s
Received healthy response to inference request in 1.7220377922058105s
5 requests
0 failed requests
5th percentile: 1.6728968620300293
10th percentile: 1.6851820945739746
20th percentile: 1.7097525596618652
30th percentile: 1.762507438659668
40th percentile: 1.8434467315673828
50th percentile: 1.9243860244750977
60th percentile: 2.0455817222595214
70th percentile: 2.166777420043945
80th percentile: 2.2628504276275634
90th percentile: 2.333800745010376
95th percentile: 2.3692759037017823
99th percentile: 2.397656030654907
mean time: 1.9878323554992676
Pipeline stage StressChecker completed in 10.96s
run pipeline stage %s
Running pipeline stage TriggerMKMLProfilingPipeline
run_pipeline:run_in_cloud %s
starting trigger_guanaco_pipeline args=%s
Pipeline stage TriggerMKMLProfilingPipeline completed in 10.51s
Shutdown handler de-registered
jic062-nemo-v1-6_v1 status is now deployed due to DeploymentManager action
Shutdown handler registered
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeleter
Skipping teardown as no inference service was successfully deployed
Pipeline stage MKMLProfilerDeleter completed in 0.12s
run pipeline stage %s
Running pipeline stage MKMLProfilerTemplater
Pipeline stage MKMLProfilerTemplater completed in 0.10s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeployer
Creating inference service jic062-nemo-v1-6-v1-profiler
Waiting for inference service jic062-nemo-v1-6-v1-profiler to be ready
Inference service jic062-nemo-v1-6-v1-profiler ready after 200.4276213645935s
Pipeline stage MKMLProfilerDeployer completed in 202.51s
run pipeline stage %s
Running pipeline stage MKMLProfilerRunner
kubectl cp /code/guanaco/guanaco_inference_services/src/inference_scripts tenant-chaiml-guanaco/jic062-nemo-v1-6-v1-profiler-predictor-00001-deployment-ff5x54g:/code/chaiverse_profiler_1726765670 --namespace tenant-chaiml-guanaco
kubectl exec -it jic062-nemo-v1-6-v1-profiler-predictor-00001-deployment-ff5x54g --namespace tenant-chaiml-guanaco -- sh -c 'cd /code/chaiverse_profiler_1726765670 && python profiles.py profile --best_of_n 8 --auto_batch 5 --batches 1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125,130,135,140,145,150,155,160,165,170,175,180,185,190,195 --samples 200 --input_tokens 512 --output_tokens 64 --summary /code/chaiverse_profiler_1726765670/summary.json'
kubectl exec -it jic062-nemo-v1-6-v1-profiler-predictor-00001-deployment-ff5x54g --namespace tenant-chaiml-guanaco -- bash -c 'cat /code/chaiverse_profiler_1726765670/summary.json'
Pipeline stage MKMLProfilerRunner completed in 945.92s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeleter
Checking if service jic062-nemo-v1-6-v1-profiler is running
Tearing down inference service jic062-nemo-v1-6-v1-profiler
Service jic062-nemo-v1-6-v1-profiler has been torndown
Pipeline stage MKMLProfilerDeleter completed in 2.05s
Shutdown handler de-registered
jic062-nemo-v1-6_v1 status is now inactive due to auto deactivation removed underperforming models
jic062-nemo-v1-6_v1 status is now torndown due to DeploymentManager action