submission_id: zonemercy-vingt-deux-gfv_3432_v6
developer_uid: chai_backend_admin
best_of: 8
celo_rating: 1238.3
display_name: temp-4
family_friendly_score: 0.5664388762338648
family_friendly_standard_error: 0.009620775915468713
formatter: {'memory_template': '', 'prompt_template': '', 'bot_template': '{bot_name}: {message}\n', 'user_template': '{user_name}: {message}\n', 'response_template': '{bot_name}:', 'truncate_by_message': False}
generation_params: {'temperature': 0.9, 'top_p': 1.0, 'min_p': 0.05, 'top_k': 80, 'presence_penalty': 0.0, 'frequency_penalty': 0.0, 'stopping_words': ['\n', '</s>', '####', 'Bot:', 'User:', 'You:', '<|im_end|>', '<|eot_id|>'], 'max_input_tokens': 1024, 'best_of': 8, 'max_output_tokens': 64}
gpu_counts: {'NVIDIA RTX A6000': 1}
ineligible_reason: num_battles<5000
is_internal_developer: True
language_model: zonemercy/Vingt-Deux-gfv1v2ep2
latencies: [{'batch_size': 1, 'throughput': 0.38212904921357327, 'latency_mean': 2.6168572652339934, 'latency_p50': 2.6220133304595947, 'latency_p90': 2.8818942070007325}, {'batch_size': 2, 'throughput': 0.6078460645330811, 'latency_mean': 3.2826189386844633, 'latency_p50': 3.2625657320022583, 'latency_p90': 3.646310329437256}, {'batch_size': 3, 'throughput': 0.7604025383330383, 'latency_mean': 3.9353084683418276, 'latency_p50': 3.9406440258026123, 'latency_p90': 4.349087405204773}, {'batch_size': 4, 'throughput': 0.8891757251476525, 'latency_mean': 4.48997691988945, 'latency_p50': 4.4786518812179565, 'latency_p90': 4.972111248970032}, {'batch_size': 5, 'throughput': 0.9803745026561128, 'latency_mean': 5.078818688392639, 'latency_p50': 5.067575097084045, 'latency_p90': 5.675174903869629}]
max_input_tokens: 1024
max_output_tokens: 64
model_architecture: MistralForCausalLM
model_group: zonemercy/Vingt-Deux-gfv
model_name: temp-4
model_num_parameters: 22247282688.0
model_repo: zonemercy/Vingt-Deux-gfv1v2ep2
model_size: 22B
num_battles: 2711
num_wins: 1321
ranking_group: single
status: torndown
submission_type: basic
throughput_3p7s: 0.71
timestamp: 2024-09-26T08:20:32+00:00
us_pacific_date: 2024-09-26
win_ratio: 0.4872740686093692
Download Preference Data
Resubmit model
Shutdown handler not registered because Python interpreter is not running in the main thread
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLizer
Starting job with name zonemercy-vingt-deux-gfv-3432-v6-mkmlizer
Waiting for job on zonemercy-vingt-deux-gfv-3432-v6-mkmlizer to finish
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ╔═════════════════════════════════════════════════════════════════════╗
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ _____ __ __ ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ / _/ /_ ___ __/ / ___ ___ / / ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ / _/ / // / |/|/ / _ \/ -_) -_) / ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ /_//_/\_, /|__,__/_//_/\__/\__/_/ ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ /___/ ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ Version: 0.11.12 ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ Copyright 2023 MK ONE TECHNOLOGIES Inc. ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ https://mk1.ai ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ The license key for the current software has been verified as ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ belonging to: ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ Chai Research Corp. ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ Account ID: 7997a29f-0ceb-4cc7-9adf-840c57b4ae6f ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ Expiration: 2024-10-15 23:59:59 ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ║ ║
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: ╚═════════════════════════════════════════════════════════════════════╝
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ╔═════════════════════════════════════════════════════════════════════╗
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ _____ __ __ ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ / _/ /_ ___ __/ / ___ ___ / / ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ / _/ / // / |/|/ / _ \/ -_) -_) / ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ /_//_/\_, /|__,__/_//_/\__/\__/_/ ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ /___/ ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ Version: 0.11.12 ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ Copyright 2023 MK ONE TECHNOLOGIES Inc. ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ https://mk1.ai ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ The license key for the current software has been verified as ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ belonging to: ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ Chai Research Corp. ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ Account ID: 7997a29f-0ceb-4cc7-9adf-840c57b4ae6f ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ Expiration: 2024-10-15 23:59:59 ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ║ ║
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: ╚═════════════════════════════════════════════════════════════════════╝
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: Downloaded to shared memory in 86.284s
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: quantizing model to /dev/shm/model_cache, profile:s0, folder:/tmp/tmpi_hd9fhd, device:0
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: Saving flywheel model at /dev/shm/model_cache
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: quantized model in 47.436s
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: Processed model zonemercy/Vingt-Deux-v2-1e5 in 133.720s
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: creating bucket guanaco-mkml-models
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: Bucket 's3://guanaco-mkml-models/' created
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: uploading /dev/shm/model_cache to s3://guanaco-mkml-models/zonemercy-vingt-deux-v2-1e5-v23
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: cp /dev/shm/model_cache/special_tokens_map.json s3://guanaco-mkml-models/zonemercy-vingt-deux-v2-1e5-v23/special_tokens_map.json
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: cp /dev/shm/model_cache/config.json s3://guanaco-mkml-models/zonemercy-vingt-deux-v2-1e5-v23/config.json
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: cp /dev/shm/model_cache/tokenizer_config.json s3://guanaco-mkml-models/zonemercy-vingt-deux-v2-1e5-v23/tokenizer_config.json
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: cp /dev/shm/model_cache/tokenizer.json s3://guanaco-mkml-models/zonemercy-vingt-deux-v2-1e5-v23/tokenizer.json
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: cp /dev/shm/model_cache/flywheel_model.1.safetensors s3://guanaco-mkml-models/zonemercy-vingt-deux-v2-1e5-v23/flywheel_model.1.safetensors
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: cp /dev/shm/model_cache/flywheel_model.0.safetensors s3://guanaco-mkml-models/zonemercy-vingt-deux-v2-1e5-v23/flywheel_model.0.safetensors
zonemercy-vingt-deux-v2-1e5-v23-mkmlizer: Loading 0: 0%| | 0/507 [00:00<?, ?it/s] Loading 0: 1%| | 5/507 [00:00<00:19, 25.17it/s] Loading 0: 2%|▏ | 10/507 [00:00<00:15, 32.79it/s] Loading 0: 3%|▎ | 14/507 [00:00<00:17, 28.34it/s] Loading 0: 4%|▎ | 19/507 [00:00<00:14, 33.49it/s] Loading 0: 5%|▍ | 23/507 [00:00<00:16, 29.40it/s] Loading 0: 6%|▌ | 28/507 [00:00<00:14, 33.69it/s] Loading 0: 6%|▋ | 32/507 [00:01<00:15, 29.83it/s] Loading 0: 7%|▋ | 37/507 [00:01<00:14, 33.11it/s] Loading 0: 8%|▊ | 41/507 [00:01<00:15, 29.21it/s] Loading 0: 9%|▉ | 46/507 [00:01<00:13, 33.90it/s] Loading 0: 10%|▉ | 50/507 [00:01<00:14, 31.04it/s] Loading 0: 11%|█ | 54/507 [00:01<00:20, 22.04it/s] Loading 0: 11%|█ | 57/507 [00:02<00:20, 21.81it/s] Loading 0: 12%|█▏ | 61/507 [00:02<00:17, 24.90it/s] Loading 0: 13%|█▎ | 65/507 [00:02<00:18, 23.94it/s] Loading 0: 14%|█▍ | 70/507 [00:02<00:15, 28.71it/s] Loading 0: 15%|█▍ | 75/507 [00:02<00:13, 31.61it/s] Loading 0: 16%|█▌ | 80/507 [00:02<00:13, 30.64it/s] Loading 0: 17%|█▋ | 85/507 [00:02<00:12, 33.89it/s] Loading 0: 18%|█▊ | 89/507 [00:03<00:13, 29.91it/s] Loading 0: 19%|█▊ | 94/507 [00:03<00:12, 33.65it/s] Loading 0: 19%|█▉ | 98/507 [00:03<00:13, 29.45it/s] Loading 0: 20%|██ | 103/507 [00:03<00:12, 32.91it/s] Loading 0: 21%|██ | 107/507 [00:03<00:13, 29.29it/s] Loading 0: 22%|██▏ | 112/507 [00:03<00:12, 32.62it/s] Loading 0: 23%|██▎ | 116/507 [00:04<00:18, 21.18it/s] Loading 0: 24%|██▍ | 122/507 [00:04<00:15, 24.09it/s] Loading 0: 25%|██▌ | 127/507 [00:04<00:13, 27.92it/s] Loading 0: 26%|██▌ | 131/507 [00:04<00:14, 26.28it/s] Loading 0: 27%|██▋ | 136/507 [00:04<00:12, 30.21it/s] Loading 0: 28%|██▊ | 140/507 [00:04<00:13, 27.78it/s] Loading 0: 29%|██▊ | 145/507 [00:05<00:11, 31.63it/s] Loading 0: 29%|██▉ | 149/507 [00:05<00:12, 29.21it/s] Loading 0: 30%|███ | 154/507 [00:05<00:10, 32.92it/s] Loading 0: 31%|███ | 158/507 [00:05<00:11, 29.73it/s] Loading 0: 32%|███▏ | 163/507 [00:05<00:10, 33.99it/s] Loading 0: 33%|███▎ | 168/507 [00:05<00:09, 34.65it/s] Loading 0: 34%|███▍ | 172/507 [00:06<00:14, 22.96it/s] Loading 0: 35%|███▍ | 176/507 [00:06<00:14, 22.93it/s] Loading 0: 36%|███▌ | 181/507 [00:06<00:11, 27.29it/s] Loading 0: 36%|███▋ | 185/507 [00:06<00:12, 26.06it/s] Loading 0: 37%|███▋ | 190/507 [00:06<00:10, 30.33it/s] Loading 0: 38%|███▊ | 194/507 [00:06<00:11, 27.73it/s] Loading 0: 39%|███▉ | 199/507 [00:06<00:09, 31.70it/s] Loading 0: 40%|████ | 203/507 [00:07<00:10, 29.00it/s] Loading 0: 41%|████▏ | 210/507 [00:07<00:08, 35.78it/s] Loading 0: 42%|████▏ | 214/507 [00:07<00:08, 33.97it/s] Loading 0: 43%|████▎ | 218/507 [00:07<00:08, 33.30it/s] Loading 0: 44%|████▍ | 222/507 [00:07<00:08, 31.91it/s] Loading 0: 45%|████▍ | 226/507 [00:07<00:12, 23.24it/s] Loading 0: 45%|████▌ | 230/507 [00:08<00:11, 23.27it/s] Loading 0: 46%|████▋ | 235/507 [00:08<00:09, 27.97it/s] Loading 0: 47%|████▋ | 239/507 [00:08<00:10, 26.32it/s] Loading 0: 48%|████▊ | 244/507 [00:08<00:08, 30.68it/s] Loading 0: 49%|████▉ | 248/507 [00:08<00:09, 28.35it/s] Loading 0: 50%|████▉ | 253/507 [00:08<00:07, 32.78it/s] Loading 0: 51%|█████ | 257/507 [00:08<00:08, 29.43it/s] Loading 0: 52%|█████▏ | 262/507 [00:08<00:07, 33.29it/s] Loading 0: 52%|█████▏ | 266/507 [00:09<00:07, 30.34it/s] Loading 0: 53%|█████▎ | 271/507 [00:09<00:06, 34.54it/s] Loading 0: 54%|█████▍ | 275/507 [00:09<00:07, 30.59it/s] Loading 0: 55%|█████▌ | 280/507 [00:09<00:06, 34.61it/s] Loading 0: 56%|█████▌ | 284/507 [00:09<00:08, 25.94it/s] Loading 0: 57%|█████▋ | 288/507 [00:09<00:08, 25.72it/s] Loading 0: 58%|█████▊ | 293/507 [00:10<00:08, 26.41it/s] Loading 0: 59%|█████▉ | 298/507 [00:10<00:06, 30.84it/s] Loading 0: 59%|█████▉ | 299/507 [00:25<00:06, 30.84it/s] Loading 0: 59%|█████▉ | 300/507 [00:25<04:09, 1.20s/it] Loading 0: 60%|█████▉ | 302/507 [00:25<03:24, 1.00it/s] Loading 0: 61%|██████ | 307/507 [00:25<02:03, 1.62it/s] Loading 0: 61%|██████ | 310/507 [00:25<01:33, 2.11it/s] Loading 0: 62%|██████▏ | 313/507 [00:25<01:09, 2.78it/s] Loading 0: 62%|██████▏ | 316/507 [00:26<00:51, 3.70it/s] Loading 0: 63%|██████▎ | 320/507 [00:26<00:35, 5.22it/s] Loading 0: 64%|██████▍ | 327/507 [00:26<00:20, 8.97it/s] Loading 0: 65%|██████▌ | 331/507 [00:26<00:15, 11.00it/s] Loading 0: 66%|██████▌ | 335/507 [00:26<00:12, 13.38it/s] Loading 0: 67%|██████▋ | 339/507 [00:26<00:10, 16.39it/s] Loading 0: 68%|██████▊ | 343/507 [00:27<00:11, 14.88it/s] Loading 0: 68%|██████▊ | 347/507 [00:27<00:09, 16.41it/s] Loading 0: 69%|██████▉ | 352/507 [00:27<00:07, 20.98it/s] Loading 0: 70%|███████ | 356/507 [00:27<00:06, 21.68it/s] Loading 0: 71%|███████ | 361/507 [00:27<00:05, 26.40it/s] Loading 0: 72%|███████▏ | 365/507 [00:27<00:05, 25.35it/s] Loading 0: 73%|███████▎ | 370/507 [00:27<00:04, 29.76it/s] Loading 0: 74%|███████▍ | 374/507 [00:28<00:04, 27.66it/s] Loading 0: 75%|███████▍ | 379/507 [00:28<00:03, 32.27it/s] Loading 0: 76%|███████▌ | 383/507 [00:28<00:04, 29.81it/s] Loading 0: 77%|███████▋ | 389/507 [00:28<00:03, 34.94it/s] Loading 0: 78%|███████▊ | 393/507 [00:28<00:03, 33.14it/s] Loading 0: 78%|███████▊ | 397/507 [00:28<00:04, 24.21it/s] Loading 0: 79%|███████▉ | 401/507 [00:29<00:04, 24.37it/s] Loading 0: 80%|████████ | 406/507 [00:29<00:03, 29.26it/s] Loading 0: 81%|████████ | 410/507 [00:29<00:03, 26.89it/s] Loading 0: 82%|████████▏ | 415/507 [00:29<00:02, 31.39it/s] Loading 0: 83%|████████▎ | 419/507 [00:29<00:03, 28.72it/s] Loading 0: 84%|████████▎ | 424/507 [00:29<00:02, 32.97it/s] Loading 0: 84%|████████▍ | 428/507 [00:29<00:02, 29.52it/s] Loading 0: 85%|████████▌ | 433/507 [00:30<00:02, 32.53it/s] Loading 0: 86%|████████▌ | 437/507 [00:30<00:02, 29.36it/s] Loading 0: 87%|████████▋ | 442/507 [00:30<00:01, 33.78it/s] Loading 0: 88%|████████▊ | 446/507 [00:30<00:02, 30.14it/s] Loading 0: 89%|████████▉ | 451/507 [00:30<00:01, 34.54it/s] Loading 0: 90%|████████▉ | 455/507 [00:32<00:09, 5.62it/s] Loading 0: 91%|█████████ | 459/507 [00:33<00:06, 7.16it/s] Loading 0: 92%|█████████▏| 465/507 [00:33<00:04, 10.12it/s] Loading 0: 93%|█████████▎| 470/507 [00:33<00:02, 13.38it/s] Loading 0: 93%|█████████▎| 474/507 [00:33<00:02, 14.99it/s] Loading 0: 94%|█████████▍| 479/507 [00:33<00:01, 19.04it/s] Loading 0: 95%|█████████▌| 483/507 [00:33<00:01, 20.04it/s] Loading 0: 96%|█████████▋| 488/507 [00:33<00:00, 24.52it/s] Loading 0: 97%|█████████▋| 492/507 [00:34<00:00, 24.17it/s] Loading 0: 98%|█████████▊| 497/507 [00:34<00:00, 28.62it/s] Loading 0: 99%|█████████▉| 501/507 [00:34<00:00, 27.27it/s]
Job zonemercy-vingt-deux-v2-1e5-v23-mkmlizer completed after 174.18s with status: succeeded
Stopping job with name zonemercy-vingt-deux-v2-1e5-v23-mkmlizer
Pipeline stage MKMLizer completed in 175.01s
run pipeline stage %s
Running pipeline stage MKMLTemplater
Pipeline stage MKMLTemplater completed in 0.11s
run pipeline stage %s
Running pipeline stage MKMLDeployer
Creating inference service zonemercy-vingt-deux-v2-1e5-v23
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: Downloaded to shared memory in 162.318s
Waiting for inference service zonemercy-vingt-deux-v2-1e5-v23 to be ready
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: quantizing model to /dev/shm/model_cache, profile:s0, folder:/tmp/tmp7dwtgy9_, device:0
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: Saving flywheel model at /dev/shm/model_cache
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: quantized model in 51.659s
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: Processed model zonemercy/Vingt-Deux-gfv1v2ep2 in 213.977s
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: creating bucket guanaco-mkml-models
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: Bucket 's3://guanaco-mkml-models/' created
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: uploading /dev/shm/model_cache to s3://guanaco-mkml-models/zonemercy-vingt-deux-gfv-3432-v6
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: cp /dev/shm/model_cache/config.json s3://guanaco-mkml-models/zonemercy-vingt-deux-gfv-3432-v6/config.json
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: cp /dev/shm/model_cache/special_tokens_map.json s3://guanaco-mkml-models/zonemercy-vingt-deux-gfv-3432-v6/special_tokens_map.json
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: cp /dev/shm/model_cache/tokenizer_config.json s3://guanaco-mkml-models/zonemercy-vingt-deux-gfv-3432-v6/tokenizer_config.json
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: cp /dev/shm/model_cache/tokenizer.json s3://guanaco-mkml-models/zonemercy-vingt-deux-gfv-3432-v6/tokenizer.json
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: cp /dev/shm/model_cache/flywheel_model.1.safetensors s3://guanaco-mkml-models/zonemercy-vingt-deux-gfv-3432-v6/flywheel_model.1.safetensors
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: cp /dev/shm/model_cache/flywheel_model.0.safetensors s3://guanaco-mkml-models/zonemercy-vingt-deux-gfv-3432-v6/flywheel_model.0.safetensors
zonemercy-vingt-deux-gfv-3432-v6-mkmlizer: Loading 0: 0%| | 0/507 [00:00<?, ?it/s] Loading 0: 1%| | 4/507 [00:00<00:14, 35.49it/s] Loading 0: 2%|▏ | 8/507 [00:00<00:17, 28.19it/s] Loading 0: 2%|▏ | 12/507 [00:00<00:18, 27.40it/s] Loading 0: 3%|▎ | 15/507 [00:00<00:20, 23.44it/s] Loading 0: 4%|▎ | 19/507 [00:00<00:18, 26.76it/s] Loading 0: 5%|▍ | 23/507 [00:00<00:16, 29.34it/s] Loading 0: 5%|▌ | 27/507 [00:01<00:27, 17.73it/s] Loading 0: 6%|▌ | 31/507 [00:01<00:22, 20.85it/s] Loading 0: 7%|▋ | 34/507 [00:01<00:22, 21.35it/s] Loading 0: 7%|▋ | 37/507 [00:01<00:20, 22.97it/s] Loading 0: 8%|▊ | 40/507 [00:01<00:20, 23.25it/s] Loading 0: 8%|▊ | 43/507 [00:01<00:20, 23.03it/s] Loading 0: 9%|▉ | 46/507 [00:01<00:19, 23.06it/s] Loading 0: 10%|▉ | 50/507 [00:02<00:17, 26.64it/s] Loading 0: 10%|█ | 53/507 [00:02<00:23, 18.95it/s] Loading 0: 11%|█ | 56/507 [00:02<00:21, 20.58it/s] Loading 0: 12%|█▏ | 59/507 [00:02<00:25, 17.79it/s] Loading 0: 13%|█▎ | 64/507 [00:02<00:18, 23.40it/s] Loading 0: 13%|█▎ | 67/507 [00:02<00:18, 24.24it/s] Loading 0: 14%|█▍ | 70/507 [00:03<00:18, 24.15it/s] Loading 0: 14%|█▍ | 73/507 [00:03<00:17, 25.06it/s] Loading 0: 15%|█▌ | 77/507 [00:03<00:15, 28.08it/s] Loading 0: 16%|█▌ | 80/507 [00:03<00:21, 20.11it/s] Loading 0: 16%|█▋ | 83/507 [00:03<00:21, 19.47it/s] Loading 0: 17%|█▋ | 87/507 [00:03<00:18, 22.66it/s] Loading 0: 18%|█▊ | 92/507 [00:03<00:16, 25.18it/s] Loading 0: 19%|█▊ | 95/507 [00:04<00:18, 22.11it/s] Loading 0: 20%|█▉ | 99/507 [00:04<00:16, 24.92it/s] Loading 0: 20%|██ | 102/507 [00:04<00:16, 24.66it/s] Loading 0: 21%|██ | 105/507 [00:04<00:15, 25.87it/s] Loading 0: 21%|██▏ | 108/507 [00:04<00:22, 17.66it/s] Loading 0: 22%|██▏ | 112/507 [00:04<00:18, 20.94it/s] Loading 0: 23%|██▎ | 115/507 [00:05<00:18, 21.61it/s] Loading 0: 23%|██▎ | 118/507 [00:05<00:16, 23.17it/s] Loading 0: 24%|██▍ | 121/507 [00:05<00:16, 23.74it/s] Loading 0: 24%|██▍ | 124/507 [00:05<00:16, 23.35it/s] Loading 0: 25%|██▌ | 127/507 [00:05<00:15, 24.60it/s] Loading 0: 26%|██▌ | 131/507 [00:05<00:13, 27.95it/s] Loading 0: 26%|██▋ | 134/507 [00:05<00:18, 19.80it/s] Loading 0: 27%|██▋ | 137/507 [00:06<00:17, 21.39it/s] Loading 0: 28%|██▊ | 140/507 [00:06<00:20, 17.70it/s] Loading 0: 29%|██▊ | 145/507 [00:06<00:15, 22.92it/s] Loading 0: 29%|██▉ | 148/507 [00:06<00:15, 23.19it/s] Loading 0: 30%|██▉ | 151/507 [00:06<00:15, 23.08it/s] Loading 0: 30%|███ | 154/507 [00:06<00:14, 24.47it/s] Loading 0: 31%|███ | 158/507 [00:06<00:12, 27.21it/s] Loading 0: 32%|███▏ | 161/507 [00:07<00:17, 19.25it/s] Loading 0: 32%|███▏ | 164/507 [00:07<00:16, 20.72it/s] Loading 0: 33%|███▎ | 167/507 [00:07<00:19, 17.50it/s] Loading 0: 34%|███▍ | 172/507 [00:07<00:14, 23.06it/s] Loading 0: 35%|███▍ | 175/507 [00:07<00:15, 21.74it/s] Loading 0: 35%|███▌ | 178/507 [00:07<00:14, 22.18it/s] Loading 0: 36%|███▌ | 181/507 [00:08<00:14, 23.25it/s] Loading 0: 36%|███▋ | 185/507 [00:08<00:12, 26.58it/s] Loading 0: 37%|███▋ | 188/507 [00:08<00:17, 18.56it/s] Loading 0: 38%|███▊ | 191/507 [00:08<00:16, 19.65it/s] Loading 0: 38%|███▊ | 194/507 [00:08<00:18, 17.31it/s] Loading 0: 39%|███▉ | 199/507 [00:08<00:13, 22.72it/s] Loading 0: 40%|███▉ | 202/507 [00:09<00:13, 22.79it/s] Loading 0: 40%|████ | 205/507 [00:09<00:13, 22.79it/s] Loading 0: 41%|████ | 208/507 [00:09<00:12, 24.06it/s] Loading 0: 42%|████▏ | 212/507 [00:09<00:10, 26.90it/s] Loading 0: 42%|████▏ | 215/507 [00:09<00:14, 19.48it/s] Loading 0: 43%|████▎ | 218/507 [00:09<00:13, 20.97it/s] Loading 0: 44%|████▎ | 221/507 [00:09<00:16, 17.76it/s] Loading 0: 45%|████▍ | 226/507 [00:10<00:12, 23.36it/s] Loading 0: 45%|████▌ | 229/507 [00:10<00:11, 23.78it/s] Loading 0: 46%|████▌ | 232/507 [00:10<00:11, 23.00it/s] Loading 0: 46%|████▋ | 235/507 [00:10<00:11, 23.80it/s] Loading 0: 47%|████▋ | 239/507 [00:10<00:09, 26.85it/s] Loading 0: 48%|████▊ | 242/507 [00:10<00:14, 18.88it/s] Loading 0: 48%|████▊ | 245/507 [00:10<00:12, 20.47it/s] Loading 0: 49%|████▉ | 248/507 [00:11<00:14, 17.60it/s] Loading 0: 50%|████▉ | 253/507 [00:11<00:11, 23.04it/s] Loading 0: 50%|█████ | 256/507 [00:11<00:10, 23.54it/s] Loading 0: 51%|█████ | 259/507 [00:11<00:10, 23.66it/s] Loading 0: 52%|█████▏ | 262/507 [00:11<00:09, 24.98it/s] Loading 0: 52%|█████▏ | 266/507 [00:11<00:08, 27.92it/s] Loading 0: 53%|█████▎ | 269/507 [00:12<00:12, 19.23it/s] Loading 0: 54%|█████▎ | 272/507 [00:12<00:11, 20.67it/s] Loading 0: 54%|█████▍ | 275/507 [00:12<00:13, 17.64it/s] Loading 0: 55%|█████▌ | 280/507 [00:12<00:09, 22.90it/s] Loading 0: 56%|█████▌ | 283/507 [00:12<00:09, 23.55it/s] Loading 0: 56%|█████▋ | 286/507 [00:12<00:09, 23.26it/s] Loading 0: 57%|█████▋ | 289/507 [00:12<00:08, 24.42it/s] Loading 0: 58%|█████▊ | 293/507 [00:13<00:07, 27.18it/s] Loading 0: 58%|█████▊ | 296/507 [00:13<00:11, 19.14it/s] Loading 0: 59%|█████▉ | 299/507 [00:13<00:10, 20.75it/s] Loading 0: 60%|█████▉ | 302/507 [00:28<04:50, 1.42s/it] Loading 0: 60%|██████ | 305/507 [00:28<03:25, 1.02s/it] Loading 0: 61%|██████ | 308/507 [00:28<02:26, 1.36it/s] Loading 0: 61%|██████▏ | 311/507 [00:28<01:46, 1.85it/s] Loading 0: 62%|██████▏ | 316/507 [00:28<01:02, 3.05it/s] Loading 0: 63%|██████▎ | 320/507 [00:28<00:43, 4.34it/s] Loading 0: 64%|██████▍ | 324/507 [00:29<00:34, 5.26it/s] Loading 0: 65%|██████▍ | 328/507 [00:29<00:25, 7.14it/s] Loading 0: 65%|██████▌ | 331/507 [00:29<00:20, 8.62it/s] Loading 0: 66%|██████▋ | 336/507 [00:29<00:14, 11.79it/s] Loading 0: 67%|██████▋ | 339/507 [00:29<00:13, 12.91it/s] Loading 0: 68%|██████▊ | 343/507 [00:29<00:10, 16.00it/s] Loading 0: 68%|██████▊ | 347/507 [00:29<00:08, 19.35it/s] Loading 0: 69%|██████▉ | 351/507 [00:30<00:10, 15.57it/s] Loading 0: 70%|███████ | 355/507 [00:30<00:08, 18.39it/s] Loading 0: 71%|███████ | 358/507 [00:30<00:07, 19.54it/s] Loading 0: 71%|███████ | 361/507 [00:30<00:06, 21.35it/s] Loading 0: 72%|███████▏ | 364/507 [00:30<00:06, 22.24it/s] Loading 0: 72%|███████▏ | 367/507 [00:30<00:06, 22.66it/s] Loading 0: 73%|███████▎ | 370/507 [00:30<00:05, 24.10it/s] Loading 0: 74%|███████▍ | 374/507 [00:31<00:04, 26.66it/s] Loading 0: 74%|███████▍ | 377/507 [00:31<00:06, 18.91it/s] Loading 0: 75%|███████▍ | 380/507 [00:31<00:06, 19.91it/s] Loading 0: 76%|███████▌ | 383/507 [00:31<00:07, 17.07it/s] Loading 0: 77%|███████▋ | 388/507 [00:31<00:05, 22.07it/s] Loading 0: 77%|███████▋ | 391/507 [00:32<00:05, 22.60it/s] Loading 0: 78%|███████▊ | 394/507 [00:32<00:04, 23.06it/s] Loading 0: 79%|███████▊ | 398/507 [00:32<00:04, 23.27it/s] Loading 0: 79%|███████▉ | 402/507 [00:32<00:04, 26.20it/s] Loading 0: 80%|███████▉ | 405/507 [00:32<00:05, 17.78it/s] Loading 0: 81%|████████ | 409/507 [00:32<00:04, 20.86it/s] Loading 0: 81%|████████▏ | 412/507 [00:32<00:04, 21.26it/s] Loading 0: 82%|████████▏ | 415/507 [00:33<00:04, 22.84it/s] Loading 0: 82%|████████▏ | 418/507 [00:33<00:03, 23.45it/s] Loading 0: 83%|████████▎ | 421/507 [00:33<00:03, 23.27it/s] Loading 0: 84%|████████▎ | 424/507 [00:33<00:03, 24.46it/s] Loading 0: 84%|████████▍ | 428/507 [00:33<00:02, 27.90it/s] Loading 0: 85%|████████▌ | 431/507 [00:33<00:03, 19.52it/s] Loading 0: 86%|████████▌ | 434/507 [00:33<00:03, 20.75it/s] Loading 0: 86%|████████▌ | 437/507 [00:34<00:04, 17.49it/s] Loading 0: 87%|████████▋ | 442/507 [00:34<00:02, 22.57it/s] Loading 0: 88%|████████▊ | 445/507 [00:34<00:02, 22.89it/s] Loading 0: 88%|████████▊ | 448/507 [00:34<00:02, 23.16it/s] Loading 0: 89%|████████▉ | 451/507 [00:34<00:02, 23.92it/s] Loading 0: 90%|████████▉ | 455/507 [00:34<00:01, 27.34it/s] Loading 0: 90%|█████████ | 458/507 [00:35<00:02, 19.22it/s] Loading 0: 91%|█████████ | 461/507 [00:35<00:02, 20.80it/s] Loading 0: 92%|█████████▏| 464/507 [00:35<00:02, 17.56it/s] Loading 0: 93%|█████████▎| 469/507 [00:35<00:01, 23.01it/s] Loading 0: 93%|█████████▎| 472/507 [00:35<00:01, 23.80it/s] Loading 0: 94%|█████████▎| 475/507 [00:35<00:01, 23.59it/s] Loading 0: 94%|█████████▍| 478/507 [00:35<00:01, 24.57it/s] Loading 0: 95%|█████████▌| 482/507 [00:36<00:00, 27.15it/s] Loading 0: 96%|█████████▌| 485/507 [00:38<00:05, 4.19it/s] Loading 0: 96%|█████████▋| 488/507 [00:38<00:03, 5.31it/s] Loading 0: 97%|█████████▋| 491/507 [00:38<00:02, 6.89it/s] Loading 0: 97%|█████████▋| 494/507 [00:38<00:01, 8.75it/s] Loading 0: 98%|█████████▊| 499/507 [00:38<00:00, 12.34it/s] Loading 0: 99%|█████████▉| 502/507 [00:39<00:00, 13.43it/s] Loading 0: 100%|█████████▉| 506/507 [00:39<00:00, 16.58it/s]
Job zonemercy-vingt-deux-gfv-3432-v6-mkmlizer completed after 245.65s with status: succeeded
Stopping job with name zonemercy-vingt-deux-gfv-3432-v6-mkmlizer
Pipeline stage MKMLizer completed in 246.05s
run pipeline stage %s
Running pipeline stage MKMLTemplater
Pipeline stage MKMLTemplater completed in 0.15s
run pipeline stage %s
Running pipeline stage MKMLDeployer
Creating inference service zonemercy-vingt-deux-gfv-3432-v6
Waiting for inference service zonemercy-vingt-deux-gfv-3432-v6 to be ready
Inference service zonemercy-vingt-deux-v2-1e5-v23 ready after 220.49615097045898s
Pipeline stage MKMLDeployer completed in 221.00s
run pipeline stage %s
Running pipeline stage StressChecker
Received healthy response to inference request in 2.8690884113311768s
Received healthy response to inference request in 2.53206467628479s
Received healthy response to inference request in 2.189180612564087s
Received healthy response to inference request in 2.5605297088623047s
Received healthy response to inference request in 2.4091384410858154s
5 requests
0 failed requests
5th percentile: 2.2331721782684326
10th percentile: 2.2771637439727783
20th percentile: 2.3651468753814697
30th percentile: 2.4337236881256104
40th percentile: 2.4828941822052
50th percentile: 2.53206467628479
60th percentile: 2.543450689315796
70th percentile: 2.554836702346802
80th percentile: 2.6222414493560793
90th percentile: 2.745664930343628
95th percentile: 2.807376670837402
99th percentile: 2.8567460632324218
mean time: 2.5120003700256346
Pipeline stage StressChecker completed in 14.29s
run pipeline stage %s
Running pipeline stage TriggerMKMLProfilingPipeline
run_pipeline:run_in_cloud %s
starting trigger_guanaco_pipeline args=%s
Pipeline stage TriggerMKMLProfilingPipeline completed in 4.03s
Shutdown handler de-registered
zonemercy-vingt-deux-v2-1e5_v23 status is now deployed due to DeploymentManager action
Inference service zonemercy-vingt-deux-gfv-3432-v6 ready after 220.4811007976532s
Pipeline stage MKMLDeployer completed in 220.88s
run pipeline stage %s
Running pipeline stage StressChecker
Received healthy response to inference request in 2.9672529697418213s
Received healthy response to inference request in 2.520599126815796s
Received healthy response to inference request in 2.6814486980438232s
Received healthy response to inference request in 2.3747010231018066s
Received healthy response to inference request in 2.609612226486206s
5 requests
0 failed requests
5th percentile: 2.4038806438446043
10th percentile: 2.4330602645874024
20th percentile: 2.4914195060729982
30th percentile: 2.538401746749878
40th percentile: 2.574006986618042
50th percentile: 2.609612226486206
60th percentile: 2.638346815109253
70th percentile: 2.6670814037322996
80th percentile: 2.738609552383423
90th percentile: 2.852931261062622
95th percentile: 2.9100921154022217
99th percentile: 2.9558207988739014
mean time: 2.6307228088378904
Pipeline stage StressChecker completed in 13.97s
run pipeline stage %s
Running pipeline stage TriggerMKMLProfilingPipeline
run_pipeline:run_in_cloud %s
starting trigger_guanaco_pipeline args=%s
Pipeline stage TriggerMKMLProfilingPipeline completed in 4.86s
Shutdown handler de-registered
zonemercy-vingt-deux-gfv_3432_v6 status is now deployed due to DeploymentManager action
Shutdown handler registered
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeleter
Skipping teardown as no inference service was successfully deployed
Pipeline stage MKMLProfilerDeleter completed in 0.16s
run pipeline stage %s
Running pipeline stage MKMLProfilerTemplater
Pipeline stage MKMLProfilerTemplater completed in 0.12s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeployer
Creating inference service zonemercy-vingt-deux-gfv-3432-v6-profiler
Waiting for inference service zonemercy-vingt-deux-gfv-3432-v6-profiler to be ready
Inference service zonemercy-vingt-deux-gfv-3432-v6-profiler ready after 222.08379697799683s
Pipeline stage MKMLProfilerDeployer completed in 222.50s
run pipeline stage %s
Running pipeline stage MKMLProfilerRunner
kubectl cp /code/guanaco/guanaco_inference_services/src/inference_scripts tenant-chaiml-guanaco/zonemercy-vingt-deux428970a7473aa000359cc739467e4d5c-deplopn82n:/code/chaiverse_profiler_1727339586 --namespace tenant-chaiml-guanaco
kubectl exec -it zonemercy-vingt-deux428970a7473aa000359cc739467e4d5c-deplopn82n --namespace tenant-chaiml-guanaco -- sh -c 'cd /code/chaiverse_profiler_1727339586 && python profiles.py profile --best_of_n 8 --auto_batch 5 --batches 1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125,130,135,140,145,150,155,160,165,170,175,180,185,190,195 --samples 200 --input_tokens 1024 --output_tokens 64 --summary /code/chaiverse_profiler_1727339586/summary.json'
kubectl exec -it zonemercy-vingt-deux428970a7473aa000359cc739467e4d5c-deplopn82n --namespace tenant-chaiml-guanaco -- bash -c 'cat /code/chaiverse_profiler_1727339586/summary.json'
Pipeline stage MKMLProfilerRunner completed in 1550.15s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeleter
Checking if service zonemercy-vingt-deux-gfv-3432-v6-profiler is running
Tearing down inference service zonemercy-vingt-deux-gfv-3432-v6-profiler
Service zonemercy-vingt-deux-gfv-3432-v6-profiler has been torndown
Pipeline stage MKMLProfilerDeleter completed in 2.33s
Shutdown handler de-registered
zonemercy-vingt-deux-gfv_3432_v6 status is now inactive due to auto deactivation removed underperforming models
Ignoring service blend-rofur-2024-10-03 already deployed
Ignoring service blend-rofur-2024-10-03 already deployed
Waiting for inference service blend-rofur-2024-10-03 to be ready
Waiting for inference service blend-rofur-2024-10-03 to be ready
Waiting for inference service blend-rofur-2024-10-03 to be ready
admin requested tearing down of blend_rofur_2024-10-03
Waiting for inference service blend-rofur-2024-10-03 to be ready
Waiting for inference service blend-rofur-2024-10-03 to be ready
Shutdown handler not registered because Python interpreter is not running in the main thread
Shutdown handler not registered because Python interpreter is not running in the main thread
admin requested tearing down of blend_rofur_2024-10-03
run pipeline %s
run pipeline %s
Shutdown handler not registered because Python interpreter is not running in the main thread
run pipeline stage %s
run pipeline stage %s
run pipeline %s
Running pipeline stage ProductionBlendMKMLTemplater
Running pipeline stage MKMLDeleter
run pipeline stage %s
Pipeline stage %s skipped, reason=%s
Checking if service zonemercy-vingt-deux-gfv-3432-v6 is running
Running pipeline stage ProductionBlendMKMLTemplater
Pipeline stage ProductionBlendMKMLTemplater completed in 35.80s
Tearing down inference service zonemercy-vingt-deux-gfv-3432-v6
Pipeline stage %s skipped, reason=%s
run pipeline stage %s
Service zonemercy-vingt-deux-gfv-3432-v6 has been torndown
Pipeline stage ProductionBlendMKMLTemplater completed in 36.34s
Running pipeline stage MKMLDeployer
Pipeline stage MKMLDeleter completed in 127.38s
run pipeline stage %s
run pipeline stage %s
Creating inference service blend-rofur-2024-10-03
Running pipeline stage MKMLDeployer
Running pipeline stage MKMLModelDeleter
Ignoring service blend-rofur-2024-10-03 already deployed
Creating inference service blend-rofur-2024-10-03
Cleaning model data from S3
zonemercy-vingt-deux-gfv_3432_v6 status is now torndown due to DeploymentManager action
Tearing down inference service blend-rofur-2024-10-03
admin requested tearing down of zonemercy-vingt-deux-gfv_3432_v6
Tearing down inference service blend-rofur-2024-10-03
Tearing down inference service blend-rofur-2024-10-03
Tearing down inference service blend-rofur-2024-10-03
Tearing down inference service blend-rofur-2024-10-03
Tearing down inference service blend-rofur-2024-10-03
clean up pipeline due to error=DeploymentError('Timeout to start the InferenceService blend-rofur-2024-10-03. The InferenceService is as following: {\'apiVersion\': \'serving.kserve.io/v1beta1\', \'kind\': \'InferenceService\', \'metadata\': {\'annotations\': {\'autoscaling.knative.dev/class\': \'hpa.autoscaling.knative.dev\', \'autoscaling.knative.dev/container-concurrency-target-percentage\': \'70\', \'autoscaling.knative.dev/initial-scale\': \'1\', \'autoscaling.knative.dev/max-scale-down-rate\': \'1.1\', \'autoscaling.knative.dev/max-scale-up-rate\': \'2\', \'autoscaling.knative.dev/metric\': \'mean_pod_latency_ms_v2\', \'autoscaling.knative.dev/panic-threshold-percentage\': \'650\', \'autoscaling.knative.dev/panic-window-percentage\': \'35\', \'autoscaling.knative.dev/scale-down-delay\': \'30s\', \'autoscaling.knative.dev/scale-to-zero-grace-period\': \'10m\', \'autoscaling.knative.dev/stable-window\': \'180s\', \'autoscaling.knative.dev/target\': \'3700\', \'autoscaling.knative.dev/target-burst-capacity\': \'-1\', \'autoscaling.knative.dev/tick-interval\': \'15s\', \'features.knative.dev/http-full-duplex\': \'Enabled\', \'networking.knative.dev/ingress-class\': \'istio.ingress.networking.knative.dev\'}, \'creationTimestamp\': \'2024-10-04T08:24:22Z\', \'finalizers\': [\'inferenceservice.finalizers\'], \'generation\': 1, \'labels\': {\'knative.coreweave.cloud/ingress\': \'istio.ingress.networking.knative.dev\', \'prometheus.k.chaiverse.com\': \'true\', \'qos.coreweave.cloud/latency\': \'low\'}, \'managedFields\': [{\'apiVersion\': \'serving.kserve.io/v1beta1\', \'fieldsType\': \'FieldsV1\', \'fieldsV1\': {\'f:metadata\': {\'f:annotations\': {\'.\': {}, \'f:autoscaling.knative.dev/class\': {}, \'f:autoscaling.knative.dev/container-concurrency-target-percentage\': {}, \'f:autoscaling.knative.dev/initial-scale\': {}, \'f:autoscaling.knative.dev/max-scale-down-rate\': {}, \'f:autoscaling.knative.dev/max-scale-up-rate\': {}, \'f:autoscaling.knative.dev/metric\': {}, \'f:autoscaling.knative.dev/panic-threshold-percentage\': {}, \'f:autoscaling.knative.dev/panic-window-percentage\': {}, \'f:autoscaling.knative.dev/scale-down-delay\': {}, \'f:autoscaling.knative.dev/scale-to-zero-grace-period\': {}, \'f:autoscaling.knative.dev/stable-window\': {}, \'f:autoscaling.knative.dev/target\': {}, \'f:autoscaling.knative.dev/target-burst-capacity\': {}, \'f:autoscaling.knative.dev/tick-interval\': {}, \'f:features.knative.dev/http-full-duplex\': {}, \'f:networking.knative.dev/ingress-class\': {}}, \'f:labels\': {\'.\': {}, \'f:knative.coreweave.cloud/ingress\': {}, \'f:prometheus.k.chaiverse.com\': {}, \'f:qos.coreweave.cloud/latency\': {}}}, \'f:spec\': {\'.\': {}, \'f:predictor\': {\'.\': {}, \'f:containerConcurrency\': {}, \'f:containers\': {}, \'f:imagePullSecrets\': {}, \'f:timeout\': {}, \'f:volumes\': {}}}}, \'manager\': \'OpenAPI-Generator\', \'operation\': \'Update\', \'time\': \'2024-10-04T08:24:22Z\'}, {\'apiVersion\': \'serving.kserve.io/v1beta1\', \'fieldsType\': \'FieldsV1\', \'fieldsV1\': {\'f:metadata\': {\'f:finalizers\': {\'.\': {}, \'v:"inferenceservice.finalizers"\': {}}}}, \'manager\': \'manager\', \'operation\': \'Update\', \'time\': \'2024-10-04T08:24:22Z\'}], \'name\': \'blend-rofur-2024-10-03\', \'namespace\': \'tenant-chaiml-guanaco\', \'resourceVersion\': \'110876429\', \'uid\': \'34297235-bdd6-4da6-8789-b3463b564126\'}, \'spec\': {\'predictor\': {\'containerConcurrency\': 0, \'containers\': [{\'args\': [\'/code/mkml_inference_service/router.py\'], \'command\': [\'python3\'], \'env\': [{\'name\': \'URL_ROUTE\', \'value\': \'GPT-J-6B-lit-v2\'}, {\'name\': \'NUM_PARTITIONS\', \'value\': \'4\'}, {\'name\': \'PORT_MAPPING\', \'value\': \'{"zonemercy-virgo-edit-v1-1e5_v13": 8081, "chaiml-lexical-nemo-v4-1k1e5_v3": 8082, "zonemercy-lexical-nemov8_5966_v9": 8083, "sao10k-mn-12b-lyra-v4a1_v9": 8084}\'}, {\'name\': \'NVIDIA_VISIBLE_DEVICES\', \'value\': \'none\'}], \'image\': \'gcr.io/chai-959f8/chai-guanaco/mkml:production_blend_v2\', \'imagePullPolicy\': \'IfNotPresent\', \'name\': \'kserve-container\', \'ports\': [{\'containerPort\': 8080, \'protocol\': \'TCP\'}], \'readinessProbe\': {\'failureThreshold\': 5, \'httpGet\': {\'path\': \'/health\', \'port\': 8080}, \'periodSeconds\': 5, \'timeoutSeconds\': 10}, \'resources\': {\'limits\': {\'cpu\': \'1\', \'memory\': \'1Gi\', \'nvidia.com/gpu\': \'0\'}, \'requests\': {\'cpu\': \'1\', \'memory\': \'1Gi\', \'nvidia.com/gpu\': \'0\'}}, \'volumeMounts\': [{\'mountPath\': \'/dev/shm\', \'name\': \'shared-memory-cache\'}]}, {\'env\': [{\'name\': \'MAX_TOKEN_INPUT\', \'value\': \'1024\'}, {\'name\': \'BEST_OF\', \'value\': \'8\'}, {\'name\': \'TEMPERATURE\', \'value\': \'0.9\'}, {\'name\': \'PRESENCE_PENALTY\', \'value\': \'0.0\'}, {\'name\': \'FREQUENCY_PENALTY\', \'value\': \'0.0\'}, {\'name\': \'TOP_P\', \'value\': \'1.0\'}, {\'name\': \'MIN_P\', \'value\': \'0.05\'}, {\'name\': \'TOP_K\', \'value\': \'80\'}, {\'name\': \'STOPPING_WORDS\', \'value\': \'["\\\\\\\\\\\\\\\\n", "</s>", "###", "Bot:", "User:", "You:", "<|im_end|>"]\'}, {\'name\': \'MAX_TOKENS\', \'value\': \'64\'}, {\'name\': \'MAX_BATCH_SIZE\', \'value\': \'128\'}, {\'name\': \'URL_ROUTE\', \'value\': \'GPT-J-6B-lit-v2\'}, {\'name\': \'OBJ_ACCESS_KEY_ID\', \'value\': \'LETMTTRMLFFAMTBK\'}, {\'name\': \'OBJ_SECRET_ACCESS_KEY\', \'value\': \'\'}, {\'name\': \'OBJ_ENDPOINT\', \'value\': \'https://accel-object.ord1.coreweave.com\'}, {\'name\': \'TENSORIZER_URI\', \'value\': \'s3://guanaco-mkml-models/zonemercy-virgo-edit-v1-1e5-v13\'}, {\'name\': \'RESERVE_MEMORY\', \'value\': \'2048\'}, {\'name\': \'DOWNLOAD_TO_LOCAL\', \'value\': \'/dev/shm/zonemercy-virgo-edit-v1-1e5_v13_model_cache\'}, {\'name\': \'NUM_GPUS\', \'value\': \'1\'}, {\'name\': \'MK1_MKML_LICENSE_KEY\', \'valueFrom\': {\'secretKeyRef\': {\'key\': \'key\', \'name\': \'mkml-license-key\'}}}, {\'name\': \'SERVER_PORT\', \'value\': \'8081\'}], \'image\': \'gcr.io/chai-959f8/chai-guanaco/mkml:cks_kube_config_v3\', \'imagePullPolicy\': \'IfNotPresent\', \'name\': \'zonemercy-virgo-edit-v1-1e5-v13\', \'readinessProbe\': {\'failureThreshold\': 5, \'httpGet\': {\'path\': \'/metrics\', \'port\': 8081}, \'periodSeconds\': 5, \'timeoutSeconds\': 10}, \'resources\': {\'limits\': {\'cpu\': \'2\', \'memory\': \'14Gi\', \'nvidia.com/gpu\': \'1\'}, \'requests\': {\'cpu\': \'2\', \'memory\': \'14Gi\', \'nvidia.com/gpu\': \'1\'}}, \'volumeMounts\': [{\'mountPath\': \'/dev/shm\', \'name\': \'shared-memory-cache\'}]}, {\'env\': [{\'name\': \'MAX_TOKEN_INPUT\', \'value\': \'1024\'}, {\'name\': \'BEST_OF\', \'value\': \'8\'}, {\'name\': \'TEMPERATURE\', \'value\': \'0.9\'}, {\'name\': \'PRESENCE_PENALTY\', \'value\': \'0.0\'}, {\'name\': \'FREQUENCY_PENALTY\', \'value\': \'0.0\'}, {\'name\': \'TOP_P\', \'value\': \'1.0\'}, {\'name\': \'MIN_P\', \'value\': \'0.05\'}, {\'name\': \'TOP_K\', \'value\': \'80\'}, {\'name\': \'STOPPING_WORDS\', \'value\': \'["\\\\\\\\\\\\\\\\n", "</s>", "###", "Bot:", "User:", "You:", "<|im_end|>"]\'}, {\'name\': \'MAX_TOKENS\', \'value\': \'64\'}, {\'name\': \'MAX_BATCH_SIZE\', \'value\': \'128\'}, {\'name\': \'URL_ROUTE\', \'value\': \'GPT-J-6B-lit-v2\'}, {\'name\': \'OBJ_ACCESS_KEY_ID\', \'value\': \'LETMTTRMLFFAMTBK\'}, {\'name\': \'OBJ_SECRET_ACCESS_KEY\', \'value\': \'\'}, {\'name\': \'OBJ_ENDPOINT\', \'value\': \'https://accel-object.ord1.coreweave.com\'}, {\'name\': \'TENSORIZER_URI\', \'value\': \'s3://guanaco-mkml-models/chaiml-lexical-nemo-v4-1k1e5-v3\'}, {\'name\': \'RESERVE_MEMORY\', \'value\': \'2048\'}, {\'name\': \'DOWNLOAD_TO_LOCAL\', \'value\': \'/dev/shm/chaiml-lexical-nemo-v4-1k1e5_v3_model_cache\'}, {\'name\': \'NUM_GPUS\', \'value\': \'1\'}, {\'name\': \'MK1_MKML_LICENSE_KEY\', \'valueFrom\': {\'secretKeyRef\': {\'key\': \'key\', \'name\': \'mkml-license-key\'}}}, {\'name\': \'SERVER_PORT\', \'value\': \'8082\'}], \'image\': \'gcr.io/chai-959f8/chai-guanaco/mkml:cks_kube_config_v3\', \'imagePullPolicy\': \'IfNotPresent\', \'name\': \'chaiml-lexical-nemo-v4-1k1e5-v3\', \'readinessProbe\': {\'failureThreshold\': 5, \'httpGet\': {\'path\': \'/metrics\', \'port\': 8082}, \'periodSeconds\': 5, \'timeoutSeconds\': 10}, \'resources\': {\'limits\': {\'cpu\': \'2\', \'memory\': \'14Gi\', \'nvidia.com/gpu\': \'1\'}, \'requests\': {\'cpu\': \'2\', \'memory\': \'14Gi\', \'nvidia.com/gpu\': \'1\'}}, \'volumeMounts\': [{\'mountPath\': \'/dev/shm\', \'name\': \'shared-memory-cache\'}]}, {\'env\': [{\'name\': \'MAX_TOKEN_INPUT\', \'value\': \'1024\'}, {\'name\': \'BEST_OF\', \'value\': \'8\'}, {\'name\': \'TEMPERATURE\', \'value\': \'0.9\'}, {\'name\': \'PRESENCE_PENALTY\', \'value\': \'0.0\'}, {\'name\': \'FREQUENCY_PENALTY\', \'value\': \'0.0\'}, {\'name\': \'TOP_P\', \'value\': \'1.0\'}, {\'name\': \'MIN_P\', \'value\': \'0.05\'}, {\'name\': \'TOP_K\', \'value\': \'80\'}, {\'name\': \'STOPPING_WORDS\', \'value\': \'["\\\\\\\\\\\\\\\\n", "</s>", "###", "Bot:", "User:", "You:", "<|im_end|>"]\'}, {\'name\': \'MAX_TOKENS\', \'value\': \'64\'}, {\'name\': \'MAX_BATCH_SIZE\', \'value\': \'128\'}, {\'name\': \'URL_ROUTE\', \'value\': \'GPT-J-6B-lit-v2\'}, {\'name\': \'OBJ_ACCESS_KEY_ID\', \'value\': \'LETMTTRMLFFAMTBK\'}, {\'name\': \'OBJ_SECRET_ACCESS_KEY\', \'value\': \'\'}, {\'name\': \'OBJ_ENDPOINT\', \'value\': \'https://accel-object.ord1.coreweave.com\'}, {\'name\': \'TENSORIZER_URI\', \'value\': \'s3://guanaco-mkml-models/zonemercy-lexical-nemov8-5966-v9\'}, {\'name\': \'RESERVE_MEMORY\', \'value\': \'2048\'}, {\'name\': \'DOWNLOAD_TO_LOCAL\', \'value\': \'/dev/shm/zonemercy-lexical-nemov8_5966_v9_model_cache\'}, {\'name\': \'NUM_GPUS\', \'value\': \'1\'}, {\'name\': \'MK1_MKML_LICENSE_KEY\', \'valueFrom\': {\'secretKeyRef\': {\'key\': \'key\', \'name\': \'mkml-license-key\'}}}, {\'name\': \'SERVER_PORT\', \'value\': \'8083\'}], \'image\': \'gcr.io/chai-959f8/chai-guanaco/mkml:cks_kube_config_v3\', \'imagePullPolicy\': \'IfNotPresent\', \'name\': \'zonemercy-lexical-nemov8-5966-v9\', \'readinessProbe\': {\'failureThreshold\': 5, \'httpGet\': {\'path\': \'/metrics\', \'port\': 8083}, \'periodSeconds\': 5, \'timeoutSeconds\': 10}, \'resources\': {\'limits\': {\'cpu\': \'2\', \'memory\': \'14Gi\', \'nvidia.com/gpu\': \'1\'}, \'requests\': {\'cpu\': \'2\', \'memory\': \'14Gi\', \'nvidia.com/gpu\': \'1\'}}, \'volumeMounts\': [{\'mountPath\': \'/dev/shm\', \'name\': \'shared-memory-cache\'}]}, {\'env\': [{\'name\': \'MAX_TOKEN_INPUT\', \'value\': \'1024\'}, {\'name\': \'BEST_OF\', \'value\': \'8\'}, {\'name\': \'TEMPERATURE\', \'value\': \'0.75\'}, {\'name\': \'PRESENCE_PENALTY\', \'value\': \'0.0\'}, {\'name\': \'FREQUENCY_PENALTY\', \'value\': \'0.0\'}, {\'name\': \'TOP_P\', \'value\': \'1.0\'}, {\'name\': \'MIN_P\', \'value\': \'0.1\'}, {\'name\': \'TOP_K\', \'value\': \'40\'}, {\'name\': \'STOPPING_WORDS\', \'value\': \'["\\\\\\\\\\\\\\\\n", "\\\\\\\\\\\\\\\\n\\\\\\\\\\\\\\\\n", "\\\\\\\\\\\\\\\\nYou:", "[/INST]", "<|im_end|>", "</s>"]\'}, {\'name\': \'MAX_TOKENS\', \'value\': \'64\'}, {\'name\': \'MAX_BATCH_SIZE\', \'value\': \'128\'}, {\'name\': \'URL_ROUTE\', \'value\': \'GPT-J-6B-lit-v2\'}, {\'name\': \'OBJ_ACCESS_KEY_ID\', \'value\': \'LETMTTRMLFFAMTBK\'}, {\'name\': \'OBJ_SECRET_ACCESS_KEY\', \'value\': \'\'}, {\'name\': \'OBJ_ENDPOINT\', \'value\': \'https://accel-object.ord1.coreweave.com\'}, {\'name\': \'TENSORIZER_URI\', \'value\': \'s3://guanaco-mkml-models/sao10k-mn-12b-lyra-v4a1-v9\'}, {\'name\': \'RESERVE_MEMORY\', \'value\': \'2048\'}, {\'name\': \'DOWNLOAD_TO_LOCAL\', \'value\': \'/dev/shm/sao10k-mn-12b-lyra-v4a1_v9_model_cache\'}, {\'name\': \'NUM_GPUS\', \'value\': \'1\'}, {\'name\': \'MK1_MKML_LICENSE_KEY\', \'valueFrom\': {\'secretKeyRef\': {\'key\': \'key\', \'name\': \'mkml-license-key\'}}}, {\'name\': \'SERVER_PORT\', \'value\': \'8084\'}], \'image\': \'gcr.io/chai-959f8/chai-guanaco/mkml:cks_kube_config_v3\', \'imagePullPolicy\': \'IfNotPresent\', \'name\': \'sao10k-mn-12b-lyra-v4a1-v9\', \'readinessProbe\': {\'failureThreshold\': 5, \'httpGet\': {\'path\': \'/metrics\', \'port\': 8084}, \'periodSeconds\': 5, \'timeoutSeconds\': 10}, \'resources\': {\'limits\': {\'cpu\': \'2\', \'memory\': \'14Gi\', \'nvidia.com/gpu\': \'1\'}, \'requests\': {\'cpu\': \'2\', \'memory\': \'14Gi\', \'nvidia.com/gpu\': \'1\'}}, \'volumeMounts\': [{\'mountPath\': \'/dev/shm\', \'name\': \'shared-memory-cache\'}]}], \'imagePullSecrets\': [{\'name\': \'docker-creds\'}], \'timeout\': 60, \'volumes\': [{\'emptyDir\': {\'medium\': \'Memory\'}, \'name\': \'shared-memory-cache\'}]}}}')
Tearing down inference service blend-rofur-2024-10-03
Tearing down inference service blend-rofur-2024-10-03
Tearing down inference service blend-rofur-2024-10-03
Shutdown handler not registered because Python interpreter is not running in the main thread
admin requested tearing down of zonemercy-vingt-deux-v1-1e5_v22
%s, retrying in %s seconds...
%s, retrying in %s seconds...
%s, retrying in %s seconds...
%s, retrying in %s seconds...
Shutdown handler de-registered
%s, retrying in %s seconds...
%s, retrying in %s seconds...
clean up pipeline due to error=DeploymentError('Exception when calling CustomObjectsApi->get_namespaced_custom_object: (404)\nReason: Not Found\nHTTP response headers: HTTPHeaderDict({\'Audit-Id\': \'f7e3f7e4-ca28-4b23-ae56-bc9a1635d19f, a1548554-b16b-49bb-af93-14479cded20f\', \'Cache-Control\': \'no-cache, private, no-cache, private\', \'Content-Length\': \'284\', \'Content-Type\': \'application/json\', \'Date\': \'Fri, 04 Oct 2024 08:35:20 GMT\', \'X-Kubernetes-Pf-Flowschema-Uid\': \'514c121f-0f8a-452c-aa56-437270a02244\', \'X-Kubernetes-Pf-Prioritylevel-Uid\': \'48ad322a-4034-4c03-9ea4-7745e1e2c31a\'})\nHTTP response body: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"inferenceservices.serving.kserve.io \\"blend-rofur-2024-10-03\\" not found","reason":"NotFound","details":{"name":"blend-rofur-2024-10-03","group":"serving.kserve.io","kind":"inferenceservices"},"code":404}\n\n\n')
run pipeline %s
Shutdown handler not registered because Python interpreter is not running in the main thread
Creating inference service blend-rofur-2024-10-03
zonemercy-vingt-deux-gfv_3432_v6 status is now torndown due to DeploymentManager action
admin requested tearing down of zonemercy-vingt-deux-gfv_3432_v6
Shutdown handler not registered because Python interpreter is not running in the main thread
run pipeline %s
admin requested tearing down of zonemercy-vingt-deux-v1-1e5_v22
run pipeline stage %s
Shutdown handler not registered because Python interpreter is not running in the main thread
Running pipeline stage MKMLDeleter
admin requested tearing down of zonemercy-vingt-deux-v1-1e5_v23
run pipeline %s
Pipeline stage %s skipped, reason=%s
Shutdown handler not registered because Python interpreter is not running in the main thread
admin requested tearing down of zonemercy-vingt-deux-v2-1e5_v23
run pipeline stage %s
Pipeline stage MKMLDeleter completed in 0.64s
run pipeline %s
Shutdown handler not registered because Python interpreter is not running in the main thread
admin requested tearing down of zonemercy-vingt-deux-v2-1e5_v24
Running pipeline stage MKMLDeleter
run pipeline stage %s
run pipeline stage %s
run pipeline %s
Shutdown handler not registered because Python interpreter is not running in the main thread
admin requested tearing down of blend_rofur_2024-10-03
admin requested tearing down of zonemercy-vingt-deux-v3-1e5v0_v5
Running pipeline stage MKMLModelDeleter
Checking if service zonemercy-vingt-deux-v1-1e5-v22 is running
Running pipeline stage MKMLDeleter
run pipeline stage %s
run pipeline %s
Shutdown handler not registered because Python interpreter is not running in the main thread
Shutdown handler not registered because Python interpreter is not running in the main thread
admin requested tearing down of zonemercy-vingt-deux-v3-1e5v0_v6
run pipeline %s
Pipeline stage %s skipped, reason=%s
Running pipeline stage MKMLDeleter
Checking if service zonemercy-vingt-deux-v1-1e5-v23 is running
run pipeline stage %s
run pipeline %s
Tearing down inference service zonemercy-vingt-deux-v1-1e5-v22
Shutdown handler not registered because Python interpreter is not running in the main thread
admin requested tearing down of zonemercy-vingt-deux-v3-1e5v2_v6
run pipeline stage %s
Pipeline stage MKMLModelDeleter completed in 20.20s
Checking if service zonemercy-vingt-deux-v2-1e5-v23 is running
Running pipeline stage MKMLDeleter
run pipeline stage %s
Tearing down inference service zonemercy-vingt-deux-v1-1e5-v23
Service zonemercy-vingt-deux-v1-1e5-v22 has been torndown
run pipeline %s
Shutdown handler not registered because Python interpreter is not running in the main thread
admin requested tearing down of zonemercy-vingt-deux-v3-1e5v2_v7
Running pipeline stage ProductionBlendMKMLTemplater
Shutdown handler de-registered
Checking if service zonemercy-vingt-deux-v2-1e5-v24 is running
Tearing down inference service zonemercy-vingt-deux-v2-1e5-v23
Running pipeline stage MKMLDeleter
Service zonemercy-vingt-deux-v1-1e5-v23 has been torndown
Pipeline stage MKMLDeleter completed in 43.01s
run pipeline stage %s
run pipeline %s
Shutdown handler not registered because Python interpreter is not running in the main thread
admin requested tearing down of blend_rofur_2024-10-03
Pipeline stage %s skipped, reason=%s
Checking if service zonemercy-vingt-deux-v2-1e5-v24 is running
Tearing down inference service zonemercy-vingt-deux-v2-1e5-v23
Running pipeline stage MKMLDeleter
Service zonemercy-vingt-deux-v1-1e5-v23 has been torndown
Pipeline stage MKMLDeleter completed in 43.01s
run pipeline stage %s
run pipeline %s
Shutdown handler not registered because Python interpreter is not running in the main thread
admin requested tearing down of blend_rofur_2024-10-03
zonemercy-vingt-deux-gfv_3432_v6 status is now torndown due to DeploymentManager action
Service zonemercy-vingt-deux-v2-1e5-v23 has been torndown
Tearing down inference service zonemercy-vingt-deux-v2-1e5-v24