etetoolkit · yudalang3 · Aug 4, 2025 · Aug 6, 2025
diff --git a/.gitignore b/.gitignore
@@ -21,3 +21,9 @@ test.svg
 
 # MacOSX
 .DS_Store
+
+
+# IDEs and editors
+.idea/
+
+user_individual_proj/
diff --git a/ete4/__init__.py b/ete4/__init__.py
@@ -23,3 +23,5 @@
 from .utils import SVG_COLORS, COLOR_SCHEMES, random_color
 
 from .version import __version__
+
+from .py4egps_treeview import *
diff --git a/ete4/config.py b/ete4/config.py
@@ -1,54 +1,92 @@
 """
-Constants with the XDG-compliant directories for ete.
+ETE Toolkit XDG-compliant directory constants configuration file.
+
+This file defines the standard paths for ETE to store data, configuration, and cache on different operating systems.
+It follows the XDG Base Directory specification to ensure compatibility across different systems.
 """
 
 # See https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
+# The XDG Base Directory specification defines which standard directories applications should use 
+# to store different types of data
 
 import os
+# Import dirname and exists functions from os.path module for path handling and file existence checking
 from os.path import dirname, exists
+# Import requests library for HTTP requests to download files
 import requests
 
 
-# Helper function to define global ETE_* variables.
+# Helper function: used to define global ETE_* variables
+# Parameters:
+#   xdg_var: XDG environment variable name (e.g., XDG_DATA_HOME)
+#   default: Default path (relative to user home directory) when environment variable is not set
 def ete_path(xdg_var, default):
-    return os.environ.get(xdg_var, os.environ['HOME'] + default) + '/ete'
+    """
+    Determine ETE-related directory paths according to XDG standard
+
+    First check if the corresponding XDG environment variable is set, if so use its value,
+    otherwise use the default path (user home directory + default subdirectory).
+    Finally create a subdirectory named 'ete' under that path.
+    """
+    # Get user home directory and concatenate default path as prefix
+    prefix = os.path.expanduser('~') + default
+    # Use environment variable value if set, otherwise use default prefix, then add '/ete' subdirectory
+    return os.environ.get(xdg_var, prefix) + '/ete'
 
+# Define ETE data storage directory (for database and other data files)
 ETE_DATA_HOME   = ete_path('XDG_DATA_HOME',   '/.local/share')
+# Define ETE configuration file directory (for configuration files)
 ETE_CONFIG_HOME = ete_path('XDG_CONFIG_HOME', '/.config')
+# Define ETE cache directory (for temporary files and cache)
 ETE_CACHE_HOME  = ete_path('XDG_CACHE_HOME',  '/.cache')
 
 
 def update_ete_data(path, url, overwrite=False):
-    """Refresh the contents of path with the ones in the given in the url."""
-    # Resolve relative paths to refer to ETE_DATA_HOME.
+    """
+    Download data from specified URL and update local file
+
+    Parameters:
+      path: Local file path (relative or absolute path)
+      url: Data source URL (relative or absolute URL)
+      overwrite: Whether to overwrite existing files, default is False (no overwrite)
+    """
+    # Handle relative paths: if path is not an absolute path (doesn't start with '/'), 
+    # resolve it as a relative path under ETE_DATA_HOME
     if not path.startswith('/'):
         path = ETE_DATA_HOME + '/' + path
 
-    # Keep existing file if we asked for it.
+    # If file already exists and not forcing overwrite, return directly without updating
     if not overwrite and exists(path):
         return
 
-    # Create the directory.
+    # Create directory: if path contains directory and directory doesn't exist, create the directory
+    # dirname(path) gets the directory part of the path, exists(dirname(path)) checks if directory exists
     if dirname(path) and not exists(dirname(path)):
+        # Use system command to create directory (mkdir -p can recursively create multi-level directories)
         os.system('mkdir -p ' + dirname(path))
 
-    # Resolve relative urls to refer to ete-data repository.
+    # Handle relative URL: if URL is not an absolute URL (doesn't start with 'https://'), 
+    # resolve it as a relative URL to the ete-data repository
     if not url.startswith('https://'):
-        url = 'https://github.com/etetoolkit/ete-data/raw/main/' + url
+        url = 'https://github.com/etetoolkit/ete-data/raw/refs/heads/main/' + url
 
-    # Update local file with the content from the url.
-    with open(path, 'wb') as f:
-        print(f'{url} -> {path}')
-        f.write(requests.get(url).content)
-    # NOTE: If we had wget, this is similar to:
+    # Download content from URL and write to local file
+    with open(path, 'wb') as f:  # Open file in binary write mode
+        print(f'{url} -> {path}')  # Print download information, showing which URL downloads to which path
+        f.write(requests.get(url).content)  # Send HTTP GET request to get content and write to file
+    # Note: If we had wget command, we could achieve similar functionality, and wget supports resume download 
+    # which is more advantageous for large file downloads
     #   os.system(f'wget -c -nv -O {path} {url}')
-    # only wget could be better since it resumes partially downloaded files.
+    # The advantage of wget is that it can resume partially downloaded files
 
 
-# Example:
+# Usage example:
 #
+#   # Define the file name to download
 #   path = 'gtdb202dump.tar.gz'
-#   url = ('https://github.com/etetoolkit/ete-data/raw/main'
+#   # Define the complete URL of the file on GitHub
+#   url = ('https://github.com/etetoolkit/ete-data/raw/refs/heads/main'
 #          '/gtdb_taxonomy/gtdb202/gtdb202dump.tar.gz')
 #
-#   update_ete_data(path, url)
+#   # Call function to update data
+#   update_ete_data(path, url)
diff --git a/ete4/core/tree.pyx b/ete4/core/tree.pyx
@@ -707,7 +707,7 @@ cdef class Tree:
         """Return or write to file the newick representation.
 
         :param str outfile: Name of the output file. If present, it will write
-            the newick to that file instad of returning it as a string.
+            the newick to that file instead of returning it as a string.
         :param list props: Properties to write for all nodes using the Extended
             Newick Format. If None, write all available properties.
         :param parser: Parser used to encode the tree in newick format.

diff --git a/ete4/py4egps_treeview/ExternalCmdInvoker.py b/ete4/py4egps_treeview/ExternalCmdInvoker.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python3
+"""
+ExternalCmdInvoker
+~~~~~~~~~~~~~~~~~~
+Cross-platform external command execution helper.
+This module provides functionality to execute external commands in a cross-platform manner,
+handling differences between Windows and Unix-like systems appropriately.
+
+Example usage:
+    >>> from ete4.py4egps_treeview import ExternalCmdInvoker
+    >>> invoker = ExternalCmdInvoker()
+    >>> out, err, code = invoker.run_cmd(["java", "-jar", "MyTool.jar", "--help"])
+"""
+import platform
+import subprocess
+import shlex
+from typing import List, Tuple, Optional, Dict
+
+
+class ExternalCmdInvoker:
+    """
+    A class to invoke external commands in a cross-platform manner.
+
+    This class provides functionality to execute external commands with comprehensive
+    control over execution parameters, handling differences between Windows and
+    Unix-like systems appropriately.
+
+    The class provides two main methods:
+    1. run_cmd() - For executing standalone executable files (e.g., java, python)
+    2. run_shell_cmd() - For executing shell commands including shell builtins
+       (e.g., dir on Windows cmd, ls on Unix shells)
+    """
+
+    def _quote(self, args: List[str]) -> str:
+        """
+        Convert argument list to a command line string suitable for the current OS.
+        This method is primarily used for logging/printing purposes to generate
+        a copy-pasteable command line string that would execute the same command.
+
+        Args:
+            args: List of command arguments
+
+        Returns:
+            A properly quoted command line string for the current operating system
+        """
+        system = platform.system()
+        if system == "Windows":
+            # Using the official list2cmdline which properly handles spaces and quotes
+            from subprocess import list2cmdline
+            return list2cmdline(args)
+        else:  # Linux / Darwin (macOS)
+            # Python 3.8+ provides shlex.join for this purpose
+            return shlex.join(args)
+
+    def run_cmd(
+            self,
+            argv: List[str],
+            cwd: Optional[str] = None,
+            env: Optional[Dict[str, str]] = None,
+            check: bool = False,
+            capture_output: bool = True,
+            text: bool = True,
+    ) -> Tuple[str, str, int]:
+        """
+        Execute an external command with comprehensive control over execution parameters.
+
+        This method provides a convenient wrapper around subprocess.run with sensible
+        defaults for common use cases and proper cross-platform argument handling.
+
+        NOTE: This method executes programs directly and cannot run shell builtin
+        commands like 'dir' on Windows or 'ls' on Unix. For shell builtin commands,
+        use run_shell_cmd() instead.
+
+        Args:
+            argv : List[str]
+                Command and its arguments as a list, e.g. ["java", "-jar", "/path/to/app.jar", "--flag", "value"]
+                Note: This should not be a shell builtin command.
+            cwd : str | None
+                Working directory for the command execution; if None, inherits the parent process directory
+            env : dict | None
+                Additional environment variables; if None, inherits the parent process environment
+            check : bool
+                If True, raises a CalledProcessError when the command returns a non-zero exit code
+            capture_output : bool
+                If True, captures and returns stdout and stderr; if False, they are not captured
+            text : bool
+                If True, returns output as strings; if False, returns output as bytes
+
+        Returns:
+            A tuple containing (stdout, stderr, return_code)
+
+        Example:
+            >>> invoker = ExternalCmdInvoker()
+            >>> out, err, code = invoker.run_cmd(["python", "--version"])
+            >>> print(f"Command exited with code: {code}")
+
+        Note:
+            This method will NOT work for:
+            - Windows cmd builtin commands like 'dir', 'cd'
+            - PowerShell commands like 'ls' (which is an alias for Get-ChildItem)
+            - Unix shell builtin commands like 'cd', 'export'
+        """
+        cmd_str = self._quote(argv)
+        print(f"[RUN] {cmd_str}")  # Log the copy-pasteable full command line
+
+        completed = subprocess.run(
+            argv,
+            cwd=cwd,
+            env=env,
+            check=check,
+            capture_output=capture_output,
+            text=text,
+        )
+        return completed.stdout, completed.stderr, completed.returncode
+
+    def run_shell_cmd(
+            self,
+            cmd: str,
+            cwd: Optional[str] = None,
+            env: Optional[Dict[str, str]] = None,
+            check: bool = False,
+            capture_output: bool = True,
+            text: bool = True,
+    ) -> Tuple[str, str, int]:
+        """
+        Execute a shell command with comprehensive control over execution parameters.
+
+        This method provides a convenient wrapper around subprocess.run with shell=True,
+        allowing execution of shell builtin commands like 'dir' on Windows or 'ls' on Unix.
+        Use this method when you need to execute shell builtin commands or when you want
+        to execute commands through the system shell.
+
+        Args:
+            cmd : str
+                The command string to execute through the shell, e.g. "dir" or "ls -l"
+            cwd : str | None
+                Working directory for the command execution; if None, inherits the parent process directory
+            env : dict | None
+                Additional environment variables; if None, inherits the parent process environment
+            check : bool
+                If True, raises a CalledProcessError when the command returns a non-zero exit code
+            capture_output : bool
+                If True, captures and returns stdout and stderr; if False, they are not captured
+            text : bool
+                If True, returns output as strings; if False, returns output as bytes
+
+        Returns:
+            A tuple containing (stdout, stderr, return_code)
+
+        Example:
+            >>> invoker = ExternalCmdInvoker()
+            >>> # On Windows (cmd builtins)
+            >>> out, err, code = invoker.run_shell_cmd("dir")
+            >>>
+            >>> # On Windows (PowerShell aliases work when PowerShell is default shell)
+            >>> out, err, code = invoker.run_shell_cmd("ls")
+            >>>
+            >>> # On Unix/Linux/macOS
+            >>> out, err, code = invoker.run_shell_cmd("ls -l")
+            >>> print(f"Command exited with code: {code}")
+
+        Note:
+            This method works for:
+            - Windows cmd builtin commands like 'dir', 'cd'
+            - PowerShell commands and aliases like 'ls'
+            - Unix shell builtin commands like 'cd', 'export'
+            - All commands that work when typed directly in the system shell
+        """
+        print(f"[RUN] {cmd}")  # Log the command
+
+        completed = subprocess.run(
+            cmd,
+            shell=True,
+            cwd=cwd,
+            env=env,
+            check=check,
+            capture_output=capture_output,
+            text=text,
+        )
+        return completed.stdout, completed.stderr, completed.returncode
+
+
+# For backward compatibility, expose the old function-based interface
+def run_cmd(
+        argv: List[str],
+        cwd: Optional[str] = None,
+        env: Optional[Dict[str, str]] = None,
+        check: bool = False,
+        capture_output: bool = True,
+        text: bool = True,
+) -> Tuple[str, str, int]:
+    """
+    Backward compatibility function.
+
+    Execute an external command with comprehensive control over execution parameters.
+    This is a wrapper around ExternalCmdInvoker.run_cmd() for backward compatibility.
+    Note: This method cannot execute shell builtin commands like 'dir' on Windows.
+    For shell builtin commands, use ExternalCmdInvoker class directly.
+    """
+    invoker = ExternalCmdInvoker()
+    return invoker.run_cmd(argv, cwd, env, check, capture_output, text)
+
+
+# —————— Simple Example ——————
+if __name__ == "__main__":
+    # Example: java -jar demo.jar --input sample.txt
+    # Running a sample command to demonstrate usage
+    invoker = ExternalCmdInvoker()
+    out, err, code = invoker.run_shell_cmd(
+        ["java", "-jar", "demo.jar", "--input", "sample.txt"], check=False
+    )
+    print("exit code:", code)
+    if out:
+        print("stdout:\n", out)
+    if err:
+        print("stderr:\n", err)
diff --git a/ete4/py4egps_treeview/__init__.py b/ete4/py4egps_treeview/__init__.py
@@ -0,0 +1,4 @@
+from .ExternalCmdInvoker import ExternalCmdInvoker, run_cmd
+from .external_jvm_launcher import launch_egps_treeview,configure_egps,get_user_egps_configure
+
+__all__ = ["ExternalCmdInvoker", "run_cmd", "launch_egps_treeview","configure_egps", "get_user_egps_configure"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -23,3 +23,5 @@
		from .utils import SVG_COLORS, COLOR_SCHEMES, random_color

		from .version import __version__

		from .py4egps_treeview import *