Ray2333 commited on
Commit
7aed6ea
1 Parent(s): e867612

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -2
README.md CHANGED
@@ -37,11 +37,12 @@ We evaluate GRM 2B on the [reward model benchmark](https://huggingface.co/spaces
37
  import torch
38
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
39
 
 
40
  # load model and tokenizer
41
  tokenizer = AutoTokenizer.from_pretrained('Ray2333/GRM-Gemma-2B-sftreg')
42
  reward_model = AutoModelForSequenceClassification.from_pretrained(
43
  'Ray2333/GRM-Gemma-2B-sftreg', torch_dtype=torch.float16, trust_remote_code=True,
44
- device_map=0,
45
  )
46
  message = [
47
  {'role': 'user', 'content': "I'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?"},
@@ -54,7 +55,7 @@ kwargs = {"padding": 'max_length', "truncation": True, "return_tensors": "pt"}
54
  tokens = tokenizer.encode_plus(message_template, **kwargs)
55
 
56
  with torch.no_grad():
57
- _, _, reward_tensor = model(tokens["input_ids"][0].to(model.device), attention_mask=tokens["attention_mask"][0].to(model.device)).logits.reshape(-1)
58
  reward = reward_tensor.cpu().detach().item()
59
  ```
60
 
 
37
  import torch
38
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
39
 
40
+ device = 'cuda:2'
41
  # load model and tokenizer
42
  tokenizer = AutoTokenizer.from_pretrained('Ray2333/GRM-Gemma-2B-sftreg')
43
  reward_model = AutoModelForSequenceClassification.from_pretrained(
44
  'Ray2333/GRM-Gemma-2B-sftreg', torch_dtype=torch.float16, trust_remote_code=True,
45
+ device_map=device,
46
  )
47
  message = [
48
  {'role': 'user', 'content': "I'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?"},
 
55
  tokens = tokenizer.encode_plus(message_template, **kwargs)
56
 
57
  with torch.no_grad():
58
+ _, _, reward_tensor = reward_model(tokens["input_ids"][0].view(1,-1).to(device), attention_mask=tokens["attention_mask"][0].view(1,-1).to(device))
59
  reward = reward_tensor.cpu().detach().item()
60
  ```
61