tolgacangoz
/

anytext

Text-to-Image

Diffusers

Safetensors

Model card Files Files and versions

xet

Community

tolgacangoz commited on Feb 24

Commit

5c9a4c3

verified ·

1 Parent(s): ea34c57

Upload anytext.py

Browse files

Files changed (1) hide show

anytext.py +12 -12

anytext.py CHANGED Viewed

@@ -233,8 +233,8 @@ class EmbeddingManager(ModelMixin, ConfigMixin):
     @torch.no_grad()
     def encode_text(self, text_info):
-        if self.get_recog_emb is None:
-            self.get_recog_emb = partial(get_recog_emb, self.recog)
         gline_list = []
         for i in range(len(text_info["n_lines"])):  # sample index in a batch
@@ -243,7 +243,7 @@ class EmbeddingManager(ModelMixin, ConfigMixin):
                 gline_list += [text_info["gly_line"][j][i : i + 1]]
         if len(gline_list) > 0:
-            recog_emb = self.get_recog_emb(gline_list)
             enc_glyph = self.proj(recog_emb.reshape(recog_emb.shape[0], -1).to(self.proj.weight.dtype))
         self.text_embs_all = []
@@ -688,7 +688,7 @@ class FrozenCLIPEmbedderT3(AbstractEncoder, ModelMixin, ConfigMixin):
         batch_encoding = self.tokenizer(
             text,
             truncation=False,
-            max_length=self.max_length,
             return_length=True,
             return_overflowing_tokens=False,
             padding="longest",
@@ -844,9 +844,9 @@ class TextEmbeddingModule(ModelMixin, ConfigMixin):
                 text = text[:max_chars]
             gly_scale = 2
             if pre_pos[i].mean() != 0:
-                gly_line = self.draw_glyph(self.font, text)
                 glyphs = self.draw_glyph2(
-                    self.font, text, poly_list[i], scale=gly_scale, width=w, height=h, add_space=False
                 )
                 if revise_pos:
                     resize_gly = cv2.resize(glyphs, (pre_pos[i].shape[1], pre_pos[i].shape[0]))
@@ -888,7 +888,7 @@ class TextEmbeddingModule(ModelMixin, ConfigMixin):
     def arr2tensor(self, arr, bs):
         arr = np.transpose(arr, (2, 0, 1))
         _arr = torch.from_numpy(arr.copy()).float().cpu()
-        if self.use_fp16:
             _arr = _arr.half()
         _arr = torch.stack([_arr for _ in range(bs)], dim=0)
         return _arr
@@ -1095,12 +1095,12 @@ class AuxiliaryLatentModule(ModelMixin, ConfigMixin):
         # get masked_x
         masked_img = ((edit_image.astype(np.float32) / 127.5) - 1.0) * (1 - np_hint)
         masked_img = np.transpose(masked_img, (2, 0, 1))
-        device = next(self.vae.parameters()).device
-        dtype = next(self.vae.parameters()).dtype
         masked_img = torch.from_numpy(masked_img.copy()).float().to(device)
         if dtype == torch.float16:
             masked_img = masked_img.half()
-        masked_x = (retrieve_latents(self.vae.encode(masked_img[None, ...])) * self.vae.config.scaling_factor).detach()
         if dtype == torch.float16:
             masked_x = masked_x.half()
         text_info["masked_x"] = torch.cat([masked_x for _ in range(num_images_per_prompt)], dim=0)
@@ -1275,7 +1275,7 @@ class AnyTextPipeline(
     ):
         super().__init__()
         text_embedding_module = TextEmbeddingModule(
-            font_path=font_path,
             use_fp16=unet.dtype == torch.float16,
         )
         auxiliary_latent_module = AuxiliaryLatentModule(
@@ -1321,7 +1321,7 @@ class AnyTextPipeline(
         self.control_image_processor = VaeImageProcessor(
             vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
         )
-        self.register_to_config(requires_safety_checker=requires_safety_checker, font_path=font_path)
     def modify_prompt(self, prompt):
         prompt = prompt.replace("“", '"')

     @torch.no_grad()
     def encode_text(self, text_info):
+        if self.config.get_recog_emb is None:
+            self.config.get_recog_emb = partial(get_recog_emb, self.recog)
         gline_list = []
         for i in range(len(text_info["n_lines"])):  # sample index in a batch
                 gline_list += [text_info["gly_line"][j][i : i + 1]]
         if len(gline_list) > 0:
+            recog_emb = self.config.get_recog_emb(gline_list)
             enc_glyph = self.proj(recog_emb.reshape(recog_emb.shape[0], -1).to(self.proj.weight.dtype))
         self.text_embs_all = []
         batch_encoding = self.tokenizer(
             text,
             truncation=False,
+            max_length=self.config.max_length,
             return_length=True,
             return_overflowing_tokens=False,
             padding="longest",
                 text = text[:max_chars]
             gly_scale = 2
             if pre_pos[i].mean() != 0:
+                gly_line = self.draw_glyph(self.config.font, text)
                 glyphs = self.draw_glyph2(
+                    self.config.font, text, poly_list[i], scale=gly_scale, width=w, height=h, add_space=False
                 )
                 if revise_pos:
                     resize_gly = cv2.resize(glyphs, (pre_pos[i].shape[1], pre_pos[i].shape[0]))
     def arr2tensor(self, arr, bs):
         arr = np.transpose(arr, (2, 0, 1))
         _arr = torch.from_numpy(arr.copy()).float().cpu()
+        if self.config.use_fp16:
             _arr = _arr.half()
         _arr = torch.stack([_arr for _ in range(bs)], dim=0)
         return _arr
         # get masked_x
         masked_img = ((edit_image.astype(np.float32) / 127.5) - 1.0) * (1 - np_hint)
         masked_img = np.transpose(masked_img, (2, 0, 1))
+        device = next(self.config.vae.parameters()).device
+        dtype = next(self.config.vae.parameters()).dtype
         masked_img = torch.from_numpy(masked_img.copy()).float().to(device)
         if dtype == torch.float16:
             masked_img = masked_img.half()
+        masked_x = (retrieve_latents(self.config.vae.encode(masked_img[None, ...])) * self.vae.config.scaling_factor).detach()
         if dtype == torch.float16:
             masked_x = masked_x.half()
         text_info["masked_x"] = torch.cat([masked_x for _ in range(num_images_per_prompt)], dim=0)
     ):
         super().__init__()
         text_embedding_module = TextEmbeddingModule(
+            font_path=self.config.font_path,
             use_fp16=unet.dtype == torch.float16,
         )
         auxiliary_latent_module = AuxiliaryLatentModule(
         self.control_image_processor = VaeImageProcessor(
             vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
         )
+        self.register_to_config(requires_safety_checker=requires_safety_checker)#, font_path=font_path)
     def modify_prompt(self, prompt):
         prompt = prompt.replace("“", '"')