submission_id: nousresearch-meta-llama_4939_v55
developer_uid: end_to_end_test
best_of: 4
display_name: nousresearch-meta-llama_4939_v55
family_friendly_score: 0.0
formatter: {'memory_template': "{bot_name}'s Persona: {memory}\n####\n", 'prompt_template': '{prompt}\n<START>\n', 'bot_template': '{bot_name}: {message}\n', 'user_template': '{user_name}: {message}\n', 'response_template': '{bot_name}:', 'truncate_by_message': False}
generation_params: {'temperature': 1.0, 'top_p': 0.99, 'min_p': 0.1, 'top_k': 40, 'presence_penalty': 0.0, 'frequency_penalty': 0.0, 'stopping_words': ['\n'], 'max_input_tokens': 512, 'best_of': 4, 'max_output_tokens': 64}
ineligible_reason: model is only for e2e test
is_internal_developer: True
language_model: NousResearch/Meta-Llama-3.1-8B-Instruct
max_input_tokens: 512
max_output_tokens: 64
model_architecture: LlamaForCausalLM
model_group: NousResearch/Meta-Llama-
model_name: nousresearch-meta-llama_4939_v55
model_num_parameters: 8030261248.0
model_repo: NousResearch/Meta-Llama-3.1-8B-Instruct
model_size: 8B
num_battles: 0
num_wins: 0
ranking_group: single
status: torndown
submission_type: basic
timestamp: 2024-09-01T03:32:19+00:00
us_pacific_date: 2024-08-31
Resubmit model
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLizer
Starting job with name nousresearch-meta-llama-4939-v55-mkmlizer
Waiting for job on nousresearch-meta-llama-4939-v55-mkmlizer to finish
nousresearch-meta-llama-4939-v55-mkmlizer: ╔═════════════════════════════════════════════════════════════════════╗
nousresearch-meta-llama-4939-v55-mkmlizer: ║ _____ __ __ ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ / _/ /_ ___ __/ / ___ ___ / / ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ / _/ / // / |/|/ / _ \/ -_) -_) / ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ /_//_/\_, /|__,__/_//_/\__/\__/_/ ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ /___/ ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ Version: 0.10.1 ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ Copyright 2023 MK ONE TECHNOLOGIES Inc. ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ https://mk1.ai ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ The license key for the current software has been verified as ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ belonging to: ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ Chai Research Corp. ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ Account ID: 7997a29f-0ceb-4cc7-9adf-840c57b4ae6f ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ Expiration: 2024-10-15 23:59:59 ║
nousresearch-meta-llama-4939-v55-mkmlizer: ║ ║
nousresearch-meta-llama-4939-v55-mkmlizer: ╚═════════════════════════════════════════════════════════════════════╝
nousresearch-meta-llama-4939-v55-mkmlizer: Downloaded to shared memory in 34.897s
nousresearch-meta-llama-4939-v55-mkmlizer: quantizing model to /dev/shm/model_cache, profile:s0, folder:/tmp/tmpicb0gc2f, device:0
nousresearch-meta-llama-4939-v55-mkmlizer: Saving flywheel model at /dev/shm/model_cache
nousresearch-meta-llama-4939-v55-mkmlizer: quantized model in 26.028s
nousresearch-meta-llama-4939-v55-mkmlizer: Processed model NousResearch/Meta-Llama-3.1-8B-Instruct in 60.926s
nousresearch-meta-llama-4939-v55-mkmlizer: creating bucket guanaco-mkml-models
nousresearch-meta-llama-4939-v55-mkmlizer: Bucket 's3://guanaco-mkml-models/' created
nousresearch-meta-llama-4939-v55-mkmlizer: uploading /dev/shm/model_cache to s3://guanaco-mkml-models/nousresearch-meta-llama-4939-v55
nousresearch-meta-llama-4939-v55-mkmlizer: cp /dev/shm/model_cache/config.json s3://guanaco-mkml-models/nousresearch-meta-llama-4939-v55/config.json
nousresearch-meta-llama-4939-v55-mkmlizer: cp /dev/shm/model_cache/special_tokens_map.json s3://guanaco-mkml-models/nousresearch-meta-llama-4939-v55/special_tokens_map.json
nousresearch-meta-llama-4939-v55-mkmlizer: cp /dev/shm/model_cache/tokenizer_config.json s3://guanaco-mkml-models/nousresearch-meta-llama-4939-v55/tokenizer_config.json
nousresearch-meta-llama-4939-v55-mkmlizer: cp /dev/shm/model_cache/tokenizer.json s3://guanaco-mkml-models/nousresearch-meta-llama-4939-v55/tokenizer.json
nousresearch-meta-llama-4939-v55-mkmlizer: cp /dev/shm/model_cache/flywheel_model.0.safetensors s3://guanaco-mkml-models/nousresearch-meta-llama-4939-v55/flywheel_model.0.safetensors
nousresearch-meta-llama-4939-v55-mkmlizer: Loading 0: 0%| | 0/291 [00:00<?, ?it/s] Loading 0: 2%|▏ | 5/291 [00:00<00:07, 36.66it/s] Loading 0: 4%|▍ | 13/291 [00:00<00:04, 57.30it/s] Loading 0: 7%|▋ | 19/291 [00:00<00:05, 50.60it/s] Loading 0: 9%|▊ | 25/291 [00:00<00:05, 52.69it/s] Loading 0: 11%|█ | 32/291 [00:00<00:05, 47.84it/s] Loading 0: 14%|█▍ | 41/291 [00:00<00:05, 49.84it/s] Loading 0: 17%|█▋ | 50/291 [00:00<00:04, 51.21it/s] Loading 0: 20%|█▉ | 58/291 [00:01<00:04, 57.82it/s] Loading 0: 22%|██▏ | 65/291 [00:01<00:04, 54.56it/s] Loading 0: 24%|██▍ | 71/291 [00:01<00:04, 53.02it/s] Loading 0: 26%|██▋ | 77/291 [00:01<00:04, 46.92it/s] Loading 0: 29%|██▊ | 83/291 [00:01<00:05, 37.16it/s] Loading 0: 30%|███ | 88/291 [00:01<00:05, 38.24it/s] Loading 0: 32%|███▏ | 94/291 [00:01<00:04, 42.64it/s] Loading 0: 34%|███▍ | 100/291 [00:02<00:04, 42.48it/s] Loading 0: 36%|███▌ | 105/291 [00:02<00:04, 43.07it/s] Loading 0: 38%|███▊ | 112/291 [00:02<00:03, 48.88it/s] Loading 0: 41%|████ | 118/291 [00:02<00:03, 46.48it/s] Loading 0: 42%|████▏ | 123/291 [00:02<00:03, 44.51it/s] Loading 0: 45%|████▍ | 130/291 [00:02<00:03, 48.82it/s] Loading 0: 47%|████▋ | 136/291 [00:02<00:03, 45.29it/s] Loading 0: 48%|████▊ | 141/291 [00:03<00:03, 43.88it/s] Loading 0: 51%|█████ | 148/291 [00:03<00:02, 48.91it/s] Loading 0: 53%|█████▎ | 154/291 [00:03<00:02, 46.63it/s] Loading 0: 55%|█████▍ | 159/291 [00:03<00:02, 46.42it/s] Loading 0: 57%|█████▋ | 166/291 [00:03<00:02, 50.60it/s] Loading 0: 59%|█████▉ | 172/291 [00:03<00:02, 48.31it/s] Loading 0: 62%|██████▏ | 179/291 [00:03<00:02, 52.22it/s] Loading 0: 64%|██████▎ | 185/291 [00:03<00:01, 54.03it/s] Loading 0: 66%|██████▌ | 191/291 [00:04<00:02, 34.61it/s] Loading 0: 67%|██████▋ | 196/291 [00:04<00:02, 35.78it/s] Loading 0: 69%|██████▉ | 201/291 [00:04<00:02, 38.73it/s] Loading 0: 71%|███████ | 206/291 [00:04<00:02, 41.01it/s] Loading 0: 73%|███████▎ | 212/291 [00:04<00:02, 39.05it/s] Loading 0: 76%|███████▌ | 220/291 [00:04<00:01, 46.60it/s] Loading 0: 78%|███████▊ | 226/291 [00:04<00:01, 44.00it/s] Loading 0: 79%|███████▉ | 231/291 [00:05<00:01, 44.24it/s] Loading 0: 82%|████████▏ | 238/291 [00:05<00:01, 49.64it/s] Loading 0: 84%|████████▍ | 244/291 [00:05<00:01, 45.24it/s] Loading 0: 86%|████████▌ | 249/291 [00:05<00:00, 44.69it/s] Loading 0: 88%|████████▊ | 256/291 [00:05<00:00, 49.71it/s] Loading 0: 90%|█████████ | 262/291 [00:05<00:00, 45.33it/s] Loading 0: 92%|█████████▏| 267/291 [00:05<00:00, 44.12it/s] Loading 0: 94%|█████████▍| 274/291 [00:05<00:00, 49.70it/s] Loading 0: 96%|█████████▌| 280/291 [00:06<00:00, 47.56it/s] Loading 0: 98%|█████████▊| 285/291 [00:06<00:00, 46.76it/s] Loading 0: 100%|█████████▉| 290/291 [00:11<00:00, 3.25it/s]
Job nousresearch-meta-llama-4939-v55-mkmlizer completed after 89.33s with status: succeeded
Stopping job with name nousresearch-meta-llama-4939-v55-mkmlizer
Pipeline stage MKMLizer completed in 90.45s
run pipeline stage %s
Running pipeline stage MKMLTemplater
Pipeline stage MKMLTemplater completed in 0.37s
run pipeline stage %s
Running pipeline stage MKMLDeployer
Creating inference service nousresearch-meta-llama-4939-v55
Waiting for inference service nousresearch-meta-llama-4939-v55 to be ready
Inference service nousresearch-meta-llama-4939-v55 ready after 183.6338770389557s
Pipeline stage MKMLDeployer completed in 184.52s
run pipeline stage %s
Running pipeline stage StressChecker
Received healthy response to inference request in 1.7927699089050293s
Received healthy response to inference request in 1.1765601634979248s
Received healthy response to inference request in 1.724660873413086s
Received healthy response to inference request in 0.9508090019226074s
Received healthy response to inference request in 1.440197229385376s
5 requests
0 failed requests
5th percentile: 0.9959592342376709
10th percentile: 1.0411094665527343
20th percentile: 1.1314099311828614
30th percentile: 1.229287576675415
40th percentile: 1.3347424030303956
50th percentile: 1.440197229385376
60th percentile: 1.55398268699646
70th percentile: 1.6677681446075439
80th percentile: 1.7382826805114746
90th percentile: 1.765526294708252
95th percentile: 1.7791481018066406
99th percentile: 1.7900455474853516
mean time: 1.4169994354248048
Pipeline stage StressChecker completed in 9.34s
run pipeline stage %s
Running pipeline stage TriggerMKMLProfilingPipeline
starting trigger_guanaco_pipeline args=%s
nousresearch-meta-llama_4939_v55 status is now torndown due to DeploymentManager action
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLProfilerTemplater
Pipeline stage %s skipped, reason=%s
Pipeline stage MKMLProfilerTemplater completed in 0.19s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeployer
Creating inference service nousresearch-meta-llama-4939-v55-profiler
Ignoring service nousresearch-meta-llama-4939-v55-profiler already deployed
Waiting for inference service nousresearch-meta-llama-4939-v55-profiler to be ready
Inference service nousresearch-meta-llama-4939-v55-profiler ready after 10.250609874725342s
Pipeline stage MKMLProfilerDeployer completed in 11.25s
run pipeline stage %s
Running pipeline stage MKMLProfilerRunner
kubectl cp /code/guanaco/guanaco_inference_services/scripts tenant-chaiml-guanaco/nousresearch-meta-llcd0deafa40dcecf506285479b0c9b574-deplo84g77:/code/chaiverse_profiler_1725162936
kubectl exec -it nousresearch-meta-llcd0deafa40dcecf506285479b0c9b574-deplo84g77 -- sh -c 'cd /code/chaiverse_profiler_1725162936 && chmod +x profiles.py && python profiles.py profile --best_of_n 4 --batches 1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125,130,135,140,145,150,155,160,165,170,175,180,185,190,195 --samples 200 --input_tokens 512 --output_tokens 64 --summary /code/chaiverse_profiler_1725162936/summary.json'
%s, retrying in %s seconds...
kubectl cp /code/guanaco/guanaco_inference_services/scripts tenant-chaiml-guanaco/nousresearch-meta-llcd0deafa40dcecf506285479b0c9b574-deplo84g77:/code/chaiverse_profiler_1725163386
kubectl exec -it nousresearch-meta-llcd0deafa40dcecf506285479b0c9b574-deplo84g77 -- sh -c 'cd /code/chaiverse_profiler_1725163386 && chmod +x profiles.py && python profiles.py profile --best_of_n 4 --batches 1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125,130,135,140,145,150,155,160,165,170,175,180,185,190,195 --samples 200 --input_tokens 512 --output_tokens 64 --summary /code/chaiverse_profiler_1725163386/summary.json'
%s, retrying in %s seconds...
kubectl cp /code/guanaco/guanaco_inference_services/scripts tenant-chaiml-guanaco/nousresearch-meta-llcd0deafa40dcecf506285479b0c9b574-deplo84g77:/code/chaiverse_profiler_1725163833
kubectl exec -it nousresearch-meta-llcd0deafa40dcecf506285479b0c9b574-deplo84g77 -- sh -c 'cd /code/chaiverse_profiler_1725163833 && chmod +x profiles.py && python profiles.py profile --best_of_n 4 --batches 1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125,130,135,140,145,150,155,160,165,170,175,180,185,190,195 --samples 200 --input_tokens 512 --output_tokens 64 --summary /code/chaiverse_profiler_1725163833/summary.json'