self.device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu")
self.processor = AutoProcessor.from_pretrained(
"HuggingFaceM4/idefics2-8b")
self.model = AutoModelForVision2Seq.from_pretrained(
"HuggingFaceM4/idefics2-8b",
torch_dtype=torch.float16,
# _attn_implementation="flash_attention_2",
).to(self.device)
print("Time taken to load model: ", time.time()-to_time)
self.device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu")
self.processor = AutoProcessor.from_pretrained(
"HuggingFaceM4/idefics2-8b")
self.model = AutoModelForVision2Seq.from_pretrained(
"HuggingFaceM4/idefics2-8b",
torch_dtype=torch.float16,
# _attn_implementation="flash_attention_2",
).to(self.device)
print("Time taken to load model: ", time.time()-to_time)