-
Notifications
You must be signed in to change notification settings - Fork 3
/
llama31-api.py
69 lines (55 loc) · 2.06 KB
/
llama31-api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Interacting with the Llama 3.1 model hosted on Azure AI Model-as-a-Service directly via the API
import urllib.request
import json
import os
import ssl
from dotenv import load_dotenv
load_dotenv()
endpoint = os.environ["LLAMA_API_ENDPOINT"]
deployment = os.environ["LLAMA_API_MODEL"]
key = os.environ["LLAMA_API_KEY"]
def allowSelfSignedHttps(allowed):
# bypass the server certificate verification on client side
if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
ssl._create_default_https_context = ssl._create_unverified_context
allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.
data = {
"messages": [
{
"role": "system",
"content": "You are a helpful AI assistant."
},
{
"role": "user",
"content": "Why is the sky blue?"
}
],
"model":deployment,
"max_tokens": 128,
"temperature": 0.8,
"top_p": 0.1,
"best_of": 1,
"presence_penalty": 0,
"use_beam_search": "false",
"ignore_eos": "false",
"skip_special_tokens": "false"
}
body = str.encode(json.dumps(data))
url = endpoint+"/chat/completions"
# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
api_key = key
if not api_key:
raise Exception("A key should be provided to invoke the endpoint")
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}
req = urllib.request.Request(url, body, headers)
try:
response = urllib.request.urlopen(req)
result = response.read()
#print(result)
response = json.loads(result.decode("utf-8"))
print(response['choices'][0]['message']['content'])
except urllib.error.HTTPError as error:
print("The request failed with status code: " + str(error.code))
# Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
print(error.info())
print(error.read().decode("utf8", 'ignore'))