varnelis · IasonC · Dec 25, 2023 · Jan 26, 2024
diff --git a/Identify-Interactable/screenrecognition/ui_dataset_khan.py b/Identify-Interactable/screenrecognition/ui_dataset_khan.py
@@ -0,0 +1,139 @@
+import os
+import json
+from PIL import Image
+import torch
+import torch.nn.functional as F
+from torchvision import transforms
+import pytorch_lightning as pl
+
+
+class KhanUIDataset(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        root="../../downloads/khan/screenshots",
+        class_dict_path="../../metadata/screenrecognition/class_map_khan_manual.json",
+        id_list_path="../../metadata/screenrecognition/train_ids_khan.json",
+    ):
+        with open(id_list_path, "r") as f:
+            self.id_list = json.load(f)
+
+        self.root = root
+        self.img_transforms = transforms.ToTensor()
+
+        with open(class_dict_path, "r") as f:
+            class_dict = json.load(f)
+
+        self.idx2Label = class_dict["idx2Label"]
+        self.label2Idx = class_dict["label2Idx"]
+
+    def __len__(self):
+        return len(self.id_list)
+
+    def __getitem__(self, idx):
+        def return_next():  # for debugging
+            return KhanUIDataset.__getitem__(self, idx + 1)
+
+        try:
+            img_path = os.path.join(self.root, self.id_list[idx])
+
+            pil_img = Image.open(img_path).convert("RGB")
+            img = self.img_transforms(pil_img)
+
+            # get annotations dictionary with bboxes
+            with open(
+                img_path.replace(".jpg", ".json").replace("screenshots", "annotations"),
+                "r",
+            ) as root_file:
+                annotations = root_file.read()
+
+            # get bounding box coordinates for each mask
+            boxes = annotations["clickable"]  # list of lists output
+            labels = [self.label2Idx["clickable"]] * len(boxes)
+
+            # convert everything into a torch.Tensor
+            boxes = torch.as_tensor(boxes, dtype=torch.float32)
+            labels = torch.tensor(labels, dtype=torch.long)
+            image_id = torch.tensor([idx])
+            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
+            # suppose all instances are not crowd
+            # iscrowd = torch.zeros((num_labels,), dtype=torch.int64)
+
+            target = {}
+            target["boxes"] = boxes
+            target["labels"] = labels
+            target["image_id"] = image_id
+            target["area"] = area
+
+            return img, target
+
+        except Exception as e:
+            print("failed", idx, self.id_list[idx], str(e))
+            return return_next()
+
+
+class KhanUIOneHotLabelDataset(KhanUIDataset):
+    def __getitem__(self, idx):
+        img, res_dict = super(KhanUIOneHotLabelDataset, self).__getitem__(idx)
+        num_classes = 2
+        one_hot_labels = F.one_hot(res_dict["labels"], num_classes=num_classes)
+        res_dict["labels"] = one_hot_labels
+        return img, res_dict
+
+
+# https://github.com/pytorch/vision/blob/5985504cc32011fbd4312600b4492d8ae0dd13b4/references/detection/utils.py#L203
+def collate_fn(batch):
+    return tuple(zip(*batch))
+
+
+class KhanUIDataModule(pl.LightningDataModule):
+    def __init__(self, batch_size=32, num_workers=2, one_hot_labels=True):
+        super(KhanUIDataModule, self).__init__()
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+        if one_hot_labels:
+            self.train_dataset = KhanUIOneHotLabelDataset(
+                id_list_path="../../metadata/screenrecognition/train_ids_khan.json"
+            )
+            self.val_dataset = KhanUIOneHotLabelDataset(
+                id_list_path="../../metadata/screenrecognition/val_ids_khan.json"
+            )
+            self.test_dataset = KhanUIOneHotLabelDataset(
+                id_list_path="../../metadata/screenrecognition/test_ids_khan.json"
+            )
+        else:
+            self.train_dataset = KhanUIDataset(
+                id_list_path="../../metadata/screenrecognition/train_ids_khan.json"
+            )
+            self.val_dataset = KhanUIDataset(
+                id_list_path="../../metadata/screenrecognition/val_ids_khan.json"
+            )
+            self.test_dataset = KhanUIDataset(
+                id_list_path="../../metadata/screenrecognition/test_ids_khan.json"
+            )
+
+    def train_dataloader(self):
+        return torch.utils.data.DataLoader(
+            self.train_dataset,
+            collate_fn=collate_fn,
+            num_workers=self.num_workers,
+            batch_size=self.batch_size,
+            shuffle=True,
+        )
+
+    def val_dataloader(self):
+        return torch.utils.data.DataLoader(
+            self.val_dataset,
+            collate_fn=collate_fn,
+            num_workers=self.num_workers,
+            batch_size=self.batch_size,
+        )
+
+    def test_dataloader(self):
+        return torch.utils.data.DataLoader(
+            self.test_dataset,
+            collate_fn=collate_fn,
+            num_workers=self.num_workers,
+            batch_size=self.batch_size,
+            drop_last=True,
+        )
diff --git a/WebUI Paper Code Repo/webui-main/webui-main/downloads/download_script.py b/WebUI Paper Code Repo/webui-main/webui-main/downloads/download_script.py
@@ -0,0 +1,5 @@
+from argparse import ArgumentParser
+
+parser = ArgumentParser()
+
+args = parser.parse_args()
diff --git a/WebUI Paper Code Repo/webui-main/webui-main/downloads/downloader.py b/WebUI Paper Code Repo/webui-main/webui-main/downloads/downloader.py
@@ -0,0 +1,211 @@
+import os
+import gdown
+import subprocess
+import glob
+import shutil
+import urllib.request
+
+DATASET_GDRIVE_URLS = {
+    "webui-all": "https://drive.google.com/drive/folders/1IGOCYjwY5wp3ZNEhxyN5bLEEJ-8M8kHg?usp=share_link",
+    "webui-val": "https://drive.google.com/drive/folders/1ntEYc-VSvFOGmbgiRasALwDGzwCGnWjH?usp=share_link",
+    "webui-test": "https://drive.google.com/drive/folders/1agq6S_-lyjXotPxDZVOvT78aoezGYEke?usp=share_link",
+    "webui-7k": "https://drive.google.com/drive/folders/1AWj8yYMPiG--UPARdJoXT4j7J-N0VLfU?usp=share_link",
+    "webui-7k-balanced": "https://drive.google.com/drive/folders/1F8W7OoMnpFGFHMK8m01r8zXb5765AB-N?usp=share_link",
+    "webui-70k": "https://drive.google.com/drive/folders/1_srKdxB9Gjl02p-cEpBQ7OO2Q65I9whG?usp=share_link",
+    "webui-350k": "https://drive.google.com/drive/folders/1yCEHzeWx33t6DsFt889SRnFoqtl-vTgu?usp=share_link"
+}
+
+METADATA_GDRIVE_URLS = {
+    "screenclassification": "https://drive.google.com/drive/folders/17AHRTUQ99UbU6ugumFROpRQ-49BKtGat?usp=share_link",
+    "screenrecognition": "https://drive.google.com/drive/folders/1jOJF2nm5F98nCn3alSR3AZhOTjuYbvDA?usp=share_link",
+    "screensim": "https://drive.google.com/drive/folders/1mHyB7Y8tHhB5aNs-vMxaSV0ANPJmwegq?usp=share_link"
+}
+
+DATASET_RICO_URL = "https://storage.googleapis.com/crowdstf-rico-uiuc-4540/rico_dataset_v0.1/unique_uis.tar.gz"
+
+DATASET_ENRICO_URL = "http://userinterfaces.aalto.fi/enrico/resources/screenshots.zip"
+METADATA_ENRICO_URL = "https://raw.githubusercontent.com/luileito/enrico/master/design_topics.csv"
+
+DATASET_VINS_URL = "https://drive.google.com/file/d/1ucti2ujNJgKV-Ts_mXALA7BZ97iPv8h6/view"
+
+DATASET_BOXES_URL = "https://drive.google.com/file/d/1FBbMaDmwipEXsKBODCaqrAn9X58Lwz83/view?usp=share_link"
+
+MODEL_GDRIVE_URLS = {
+    "screenclassification": {
+        "screenclassification-resnet-baseline.ckpt": "https://drive.google.com/file/d/1uBZMa5Z1lXiGGf5i4JHdhIck5gJjw22K/view?usp=share_link",
+        "screenclassification-resnet-noisystudent+rico.ckpt": "https://drive.google.com/file/d/1olTBXNN4bzj32LYz06Rd2gPj4LTv079H/view?usp=share_link",
+        "screenclassification-resnet-noisystudent+web7k.ckpt": "https://drive.google.com/file/d/1c9Ow1sm8tQwGmLlXX-_4rWn6opklIuR6/view?usp=share_link",
+        "screenclassification-resnet-noisystudent+web7kbal.ckpt": "https://drive.google.com/file/d/1bKuieDZplPBWSLJc7NAkZZG3pP7DouQp/view?usp=share_link",
+        "screenclassification-resnet-noisystudent+web70k.ckpt": "https://drive.google.com/file/d/1Mfksb3Rnp2GhCnTHLveXw4v43vpDivFd/view?usp=share_link",
+        "screenclassification-resnet-noisystudent+web350k.ckpt": "https://drive.google.com/file/d/1jAVpeXV46veDq2L4RJMX8uLTim23Egl_/view?usp=share_link",
+        "screenclassification-resnet-randaugment.ckpt": "https://drive.google.com/file/d/1vFm9e20GORqM5Bhxn-6F9LCZCzxEG5n5/view?usp=share_link",
+        "screenclassification-vgg16-baseline.ckpt": "https://drive.google.com/file/d/13hZRlAGW9OErdjMQnBjsMklv2vyZR535/view?usp=share_link",    
+    },
+    "screenrecognition": {
+        "screenrecognition-ssd-vins.ckpt": "https://drive.google.com/file/d/1bu3wL2PH6AHgg5-7YkEidsBLaPbs22kc/view?usp=share_link",
+        "screenrecognition-vins.ckpt": "https://drive.google.com/file/d/10Id643ldFjOeOtnY2cGCtKOEDGMp_BHV/view?usp=share_link",
+        "screenrecognition-web7k-vins.ckpt": "https://drive.google.com/file/d/1M3uoxLKncwf0WHLbEhbCoOytDjQT2gU9/view?usp=share_link",
+        "screenrecognition-web7k.ckpt": "https://drive.google.com/file/d/1DfIz1geicHYNq3_UdT10oSjLCDgkE72t/view?usp=share_link",
+        "screenrecognition-web7kbal-vins.ckpt": "https://drive.google.com/file/d/10Gb77oBa7HmQwcR2vLVTdNdoUPagEPyy/view?usp=share_link",
+        "screenrecognition-web7kbal.ckpt": "https://drive.google.com/file/d/1-0TrGpDaQMrDK2Wf8A-7pnrgHnJXmdiz/view?usp=share_link",
+        "screenrecognition-web70k-vins.ckpt": "https://drive.google.com/file/d/1BsOa3e9T3_HM5rGPY70K9Z4FuBIrKqVs/view?usp=share_link",
+        "screenrecognition-web70k.ckpt": "https://drive.google.com/file/d/1yeCFHIfLl7taSAoYYCuECwaCZmLToKlI/view?usp=share_link",
+        "screenrecognition-web350k-vins.ckpt": "https://drive.google.com/file/d/14BjYnwyWhHK8APpWLHj9J7SgoHBLjrMb/view?usp=share_link",
+        "screenrecognition-web350k.ckpt": "https://drive.google.com/file/d/1SjU-yjhBXdImCmSf251EWceAH-QAef_N/view?usp=share_link",
+    },
+    "screensim": {
+        "screensim-resnet-uda+web7k.ckpt": "https://drive.google.com/file/d/16fRllQ80tYuiFoSlrpnDOAsmD5W-sWPb/view?usp=share_link",
+        "screensim-resnet-web7k.ckpt": "https://drive.google.com/file/d/1uxpcGHvceYYTxxj98bBme2QXkmgXGiQg/view?usp=share_link",
+        "screensim-resnet-uda+web7kbal.ckpt": "https://drive.google.com/file/d/1CareIltu1GgKINm9XNjfUwKA3YBniklY/view?usp=share_link",
+        "screensim-resnet-web7kbal.ckpt": "https://drive.google.com/file/d/133tv6-nFdm78ngn4DVMbJ0W8QEpS__aB/view?usp=share_link",
+        "screensim-resnet-uda+web70k.ckpt": "https://drive.google.com/file/d/1taNDFSIUP1ThsWpkeb0Vd4V8I4exf0qM/view?usp=share_link",
+        "screensim-resnet-web70k.ckpt": "https://drive.google.com/file/d/1oj32qKLVOZdwFtqosht2tWaaVA84uXA-/view?usp=share_link",
+        "screensim-resnet-uda+web350k.ckpt": "https://drive.google.com/file/d/1WCofe3JUDT_AJNVLXjVxWsBurLe0wcjQ/view?usp=share_link",
+        "screensim-resnet-web350k.ckpt": "https://drive.google.com/file/d/1vP7-YHkcz9BqfmKhpd_F_LlgzYENTGJG/view?usp=share_link",
+    }
+}
+
+PATH_7z = 'C:\Program Files\\7-Zip\\7z.exe'
+
+def download_rico(tmp_path="tmp", dataset_path="rico"):
+    if not os.path.exists(tmp_path):
+        os.makedirs(tmp_path)
+
+    output_path = os.path.join(tmp_path, "unique_uis.tar.gz")
+    urllib.request.urlretrieve(DATASET_RICO_URL, output_path)
+
+    extract_path = os.path.join(tmp_path, "extract")
+
+    cmd = [PATH_7z, 'x', output_path, '-o' + tmp_path]
+    sp = subprocess.Popen(cmd, shell=True)
+    sp.communicate()
+
+    cmd = [PATH_7z, 'x', os.path.join(tmp_path, "unique_uis.tar"), "-o" + extract_path]
+    sp = subprocess.Popen(cmd, shell=True)
+    sp.communicate()
+
+    if not os.path.exists(dataset_path):
+        os.makedirs(dataset_path)
+
+    os.rename(os.path.join(extract_path, "combined"), os.path.join(dataset_path, "combined"))
+    shutil.rmtree(tmp_path)
+
+def download_vins(tmp_path="tmp", dataset_path="vins"):
+    if not os.path.exists(tmp_path):
+        os.makedirs(tmp_path)
+
+    gdown.download(DATASET_VINS_URL, output=os.path.join(tmp_path, "VINS Dataset.zip"), fuzzy=True, use_cookies=False)
+
+    extract_path = os.path.join(tmp_path, "extract")
+    cmd = [PATH_7z, 'x', os.path.join(tmp_path, "VINS Dataset.zip"), '-o' + str(extract_path)]
+    sp = subprocess.Popen(cmd, shell=True)
+    sp.communicate()
+    os.rename(extract_path, dataset_path)
+    shutil.rmtree(tmp_path)
+
+def download_boxes_gdown(tmp_path="tmp", dataset_path="webui-boxes"):
+    if not os.path.exists(tmp_path):
+        os.makedirs(tmp_path)
+
+    gdown.download(DATASET_BOXES_URL, output=os.path.join(tmp_path, "all_boxes.zip"), fuzzy=True, use_cookies=False)
+
+    extract_path = os.path.join(tmp_path, "extract")
+    cmd = [PATH_7z, 'x', os.path.join(tmp_path, "all_boxes.zip"), '-o' + str(extract_path)]
+    sp = subprocess.Popen(cmd, shell=True)
+    sp.communicate()
+    os.rename(extract_path, dataset_path)
+    shutil.rmtree(tmp_path)
+
+def download_enrico(tmp_path="tmp", dataset_path="enrico", screenclassification_metadata_path="../metadata/screenclassification"):
+    if not os.path.exists(tmp_path):
+        os.makedirs(tmp_path)
+
+    output_path = os.path.join(tmp_path, "screenshots.zip")
+    urllib.request.urlretrieve(DATASET_ENRICO_URL, output_path)
+
+    extract_path = os.path.join(tmp_path, "extract")
+    cmd = [PATH_7z, 'x', output_path, '-o' + str(extract_path)]
+    sp = subprocess.Popen(cmd, shell=True)
+    sp.communicate()
+
+    if not os.path.exists(dataset_path):
+        os.makedirs(dataset_path)
+
+    os.rename(os.path.join(extract_path, "screenshots"), os.path.join(dataset_path, "screenshots"))
+    shutil.rmtree(tmp_path)    
+
+    if not os.path.exists(screenclassification_metadata_path):
+        os.makedirs(screenclassification_metadata_path)
+
+    metadata_output_path = os.path.join(screenclassification_metadata_path, "design_topics.csv")
+    urllib.request.urlretrieve(METADATA_ENRICO_URL, metadata_output_path)
+
+
+def download_metadata_gdown(metadata_key, metadata_path="../metadata"):
+    if not os.path.exists(metadata_path):
+        os.makedirs(metadata_path)
+
+    gdown.download_folder(METADATA_GDRIVE_URLS[metadata_key], output=os.path.join(metadata_path, metadata_key), use_cookies=False)
+
+
+def download_dataset_gdown(dataset_key, tmp_path="tmp", dataset_path="ds"):
+    if not os.path.exists(tmp_path):
+        os.makedirs(tmp_path)
+
+    if not os.path.exists(os.path.join(tmp_path, dataset_key)):
+        gdown.download_folder(DATASET_GDRIVE_URLS[dataset_key], output=os.path.join(tmp_path, dataset_key), use_cookies=False)
+
+    extract_file = glob.glob(os.path.join(tmp_path, dataset_key) + "\*.zip.001")[0]
+    split_json_file = glob.glob(os.path.join(tmp_path, dataset_key) + "\*.json")[0]
+    #if len(split_json_file):
+    #    split_json_file = split_json_file[0]
+    if not os.path.exists(os.path.basename(split_json_file)):
+        shutil.move(split_json_file, ".")
+    #else: split_json_file = ''
+
+    extract_path = os.path.join(tmp_path, "extract")
+    if not os.path.exists(extract_path):
+        os.makedirs(extract_path)
+
+    #import pathlib
+    #home_dir = str(pathlib.Path(__file__).parent) + '/'
+    #extract_file = pathlib.PureWindowsPath(home_dir+str(extract_file)).as_posix()
+    #extract_path = pathlib.PureWindowsPath(home_dir+str(extract_path)).as_posix()
+
+    cmd = [PATH_7z, 'x', extract_file, "-o" + extract_path]
+
+    sp = subprocess.Popen(cmd, shell=True)
+    sp.communicate()
+
+    if not os.path.exists(dataset_path):
+        os.makedirs(dataset_path)
+
+    dataset_ids = glob.glob(extract_path + "/*/*")
+
+    for folder in dataset_ids:
+        if not os.path.exists(os.path.join(dataset_path, os.path.basename(folder))):
+            os.rename(folder, os.path.join(dataset_path, os.path.basename(folder)))
+
+    # delete the tmp path
+    shutil.rmtree(tmp_path)
+
+def download_model_gdown(model_name, model_key, model_path="checkpoints"):
+    if not os.path.exists(model_path):
+        os.makedirs(model_path)
+
+    gdown.download(MODEL_GDRIVE_URLS[model_name][model_key], output=os.path.join(model_path, model_key), fuzzy=True, use_cookies=False)
+
+if __name__ == "__main__":
+#    download_model_gdown("screenclassification", "screenclassification-resnet-noisystudent+web350k.ckpt")
+#    download_enrico()
+#    download_dataset_gdown("webui-7k-balanced")
+#.    download_dataset_gdown("webui-7k")
+#    download_metadata_gdown("screenclassification")
+#    download_vins()
+#.    download_metadata_gdown("screenrecognition")
+#.    download_boxes_gdown(dataset_path='webui-boxes')
+#.    download_dataset_gdown("webui-val")
+    download_dataset_gdown("webui-test")
+#.    download_model_gdown("screenrecognition", "screenrecognition-web350k-vins.ckpt")
+#    download_metadata_gdown("screensim")
+#    download_rico()
diff --git a/WebUI Paper Code Repo/webui-main/webui-main/downloads/rand_training_split_Manual.py b/WebUI Paper Code Repo/webui-main/webui-main/downloads/rand_training_split_Manual.py
@@ -0,0 +1,33 @@
+
+import json, math
+import numpy as np
+
+def rand_train_split_Manual():
+
+    ALL_FILE = 'train_split_web7k.json'
+    TRAIN_FILE = 'train_split_web7k_Manual.json'
+    VAL_FILE = 'val_split_web7k_Manual.json'
+    TEST_FILE = 'test_split_web7k_Manual.json'
+
+    with open(ALL_FILE, 'r') as fp:
+        all_data = np.array(json.load(fp))
+
+    # 7000 all -> 90% train, 5% val, 5% test
+    perm = np.random.permutation(all_data)
+
+    val_break = math.floor(len(all_data)*0.9)
+    test_break = math.floor(len(all_data)*0.95)
+
+    train = list(perm[0:val_break])
+    val = list(perm[val_break:test_break])
+    test = list(perm[test_break:])
+
+    with open(TRAIN_FILE, 'w') as fp:
+        json.dump(train, fp)
+    with open(VAL_FILE, 'w') as fp:
+        json.dump(val, fp)
+    with open(TEST_FILE, 'w') as fp:
+        json.dump(test, fp)
+
+if __name__ == '__main__':
+    rand_train_split_Manual()