diff --git a/.gitignore b/.gitignore index ef920b75f..8cbf8708e 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,9 @@ test.svg # MacOSX .DS_Store + + +# IDEs and editors +.idea/ + +user_individual_proj/ \ No newline at end of file diff --git a/ete4/__init__.py b/ete4/__init__.py index f614e627e..7ec098182 100644 --- a/ete4/__init__.py +++ b/ete4/__init__.py @@ -23,3 +23,5 @@ from .utils import SVG_COLORS, COLOR_SCHEMES, random_color from .version import __version__ + +from .py4egps_treeview import * \ No newline at end of file diff --git a/ete4/config.py b/ete4/config.py index 4de6cb991..22626a1c0 100644 --- a/ete4/config.py +++ b/ete4/config.py @@ -1,54 +1,92 @@ """ -Constants with the XDG-compliant directories for ete. +ETE Toolkit XDG-compliant directory constants configuration file. + +This file defines the standard paths for ETE to store data, configuration, and cache on different operating systems. +It follows the XDG Base Directory specification to ensure compatibility across different systems. """ # See https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html +# The XDG Base Directory specification defines which standard directories applications should use +# to store different types of data import os +# Import dirname and exists functions from os.path module for path handling and file existence checking from os.path import dirname, exists +# Import requests library for HTTP requests to download files import requests -# Helper function to define global ETE_* variables. +# Helper function: used to define global ETE_* variables +# Parameters: +# xdg_var: XDG environment variable name (e.g., XDG_DATA_HOME) +# default: Default path (relative to user home directory) when environment variable is not set def ete_path(xdg_var, default): - return os.environ.get(xdg_var, os.environ['HOME'] + default) + '/ete' + """ + Determine ETE-related directory paths according to XDG standard + + First check if the corresponding XDG environment variable is set, if so use its value, + otherwise use the default path (user home directory + default subdirectory). + Finally create a subdirectory named 'ete' under that path. + """ + # Get user home directory and concatenate default path as prefix + prefix = os.path.expanduser('~') + default + # Use environment variable value if set, otherwise use default prefix, then add '/ete' subdirectory + return os.environ.get(xdg_var, prefix) + '/ete' +# Define ETE data storage directory (for database and other data files) ETE_DATA_HOME = ete_path('XDG_DATA_HOME', '/.local/share') +# Define ETE configuration file directory (for configuration files) ETE_CONFIG_HOME = ete_path('XDG_CONFIG_HOME', '/.config') +# Define ETE cache directory (for temporary files and cache) ETE_CACHE_HOME = ete_path('XDG_CACHE_HOME', '/.cache') def update_ete_data(path, url, overwrite=False): - """Refresh the contents of path with the ones in the given in the url.""" - # Resolve relative paths to refer to ETE_DATA_HOME. + """ + Download data from specified URL and update local file + + Parameters: + path: Local file path (relative or absolute path) + url: Data source URL (relative or absolute URL) + overwrite: Whether to overwrite existing files, default is False (no overwrite) + """ + # Handle relative paths: if path is not an absolute path (doesn't start with '/'), + # resolve it as a relative path under ETE_DATA_HOME if not path.startswith('/'): path = ETE_DATA_HOME + '/' + path - # Keep existing file if we asked for it. + # If file already exists and not forcing overwrite, return directly without updating if not overwrite and exists(path): return - # Create the directory. + # Create directory: if path contains directory and directory doesn't exist, create the directory + # dirname(path) gets the directory part of the path, exists(dirname(path)) checks if directory exists if dirname(path) and not exists(dirname(path)): + # Use system command to create directory (mkdir -p can recursively create multi-level directories) os.system('mkdir -p ' + dirname(path)) - # Resolve relative urls to refer to ete-data repository. + # Handle relative URL: if URL is not an absolute URL (doesn't start with 'https://'), + # resolve it as a relative URL to the ete-data repository if not url.startswith('https://'): - url = 'https://github.com/etetoolkit/ete-data/raw/main/' + url + url = 'https://github.com/etetoolkit/ete-data/raw/refs/heads/main/' + url - # Update local file with the content from the url. - with open(path, 'wb') as f: - print(f'{url} -> {path}') - f.write(requests.get(url).content) - # NOTE: If we had wget, this is similar to: + # Download content from URL and write to local file + with open(path, 'wb') as f: # Open file in binary write mode + print(f'{url} -> {path}') # Print download information, showing which URL downloads to which path + f.write(requests.get(url).content) # Send HTTP GET request to get content and write to file + # Note: If we had wget command, we could achieve similar functionality, and wget supports resume download + # which is more advantageous for large file downloads # os.system(f'wget -c -nv -O {path} {url}') - # only wget could be better since it resumes partially downloaded files. + # The advantage of wget is that it can resume partially downloaded files -# Example: +# Usage example: # +# # Define the file name to download # path = 'gtdb202dump.tar.gz' -# url = ('https://github.com/etetoolkit/ete-data/raw/main' +# # Define the complete URL of the file on GitHub +# url = ('https://github.com/etetoolkit/ete-data/raw/refs/heads/main' # '/gtdb_taxonomy/gtdb202/gtdb202dump.tar.gz') # -# update_ete_data(path, url) +# # Call function to update data +# update_ete_data(path, url) \ No newline at end of file diff --git a/ete4/core/tree.pyx b/ete4/core/tree.pyx index 85aecb6e7..d0c8598b8 100644 --- a/ete4/core/tree.pyx +++ b/ete4/core/tree.pyx @@ -707,7 +707,7 @@ cdef class Tree: """Return or write to file the newick representation. :param str outfile: Name of the output file. If present, it will write - the newick to that file instad of returning it as a string. + the newick to that file instead of returning it as a string. :param list props: Properties to write for all nodes using the Extended Newick Format. If None, write all available properties. :param parser: Parser used to encode the tree in newick format. diff --git a/ete4/py4egps_treeview/ExternalCmdInvoker.py b/ete4/py4egps_treeview/ExternalCmdInvoker.py new file mode 100644 index 000000000..12fd1dec8 --- /dev/null +++ b/ete4/py4egps_treeview/ExternalCmdInvoker.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +""" +ExternalCmdInvoker +~~~~~~~~~~~~~~~~~~ +Cross-platform external command execution helper. +This module provides functionality to execute external commands in a cross-platform manner, +handling differences between Windows and Unix-like systems appropriately. + +Example usage: + >>> from ete4.py4egps_treeview import ExternalCmdInvoker + >>> invoker = ExternalCmdInvoker() + >>> out, err, code = invoker.run_cmd(["java", "-jar", "MyTool.jar", "--help"]) +""" +import platform +import subprocess +import shlex +from typing import List, Tuple, Optional, Dict + + +class ExternalCmdInvoker: + """ + A class to invoke external commands in a cross-platform manner. + + This class provides functionality to execute external commands with comprehensive + control over execution parameters, handling differences between Windows and + Unix-like systems appropriately. + + The class provides two main methods: + 1. run_cmd() - For executing standalone executable files (e.g., java, python) + 2. run_shell_cmd() - For executing shell commands including shell builtins + (e.g., dir on Windows cmd, ls on Unix shells) + """ + + def _quote(self, args: List[str]) -> str: + """ + Convert argument list to a command line string suitable for the current OS. + This method is primarily used for logging/printing purposes to generate + a copy-pasteable command line string that would execute the same command. + + Args: + args: List of command arguments + + Returns: + A properly quoted command line string for the current operating system + """ + system = platform.system() + if system == "Windows": + # Using the official list2cmdline which properly handles spaces and quotes + from subprocess import list2cmdline + return list2cmdline(args) + else: # Linux / Darwin (macOS) + # Python 3.8+ provides shlex.join for this purpose + return shlex.join(args) + + def run_cmd( + self, + argv: List[str], + cwd: Optional[str] = None, + env: Optional[Dict[str, str]] = None, + check: bool = False, + capture_output: bool = True, + text: bool = True, + ) -> Tuple[str, str, int]: + """ + Execute an external command with comprehensive control over execution parameters. + + This method provides a convenient wrapper around subprocess.run with sensible + defaults for common use cases and proper cross-platform argument handling. + + NOTE: This method executes programs directly and cannot run shell builtin + commands like 'dir' on Windows or 'ls' on Unix. For shell builtin commands, + use run_shell_cmd() instead. + + Args: + argv : List[str] + Command and its arguments as a list, e.g. ["java", "-jar", "/path/to/app.jar", "--flag", "value"] + Note: This should not be a shell builtin command. + cwd : str | None + Working directory for the command execution; if None, inherits the parent process directory + env : dict | None + Additional environment variables; if None, inherits the parent process environment + check : bool + If True, raises a CalledProcessError when the command returns a non-zero exit code + capture_output : bool + If True, captures and returns stdout and stderr; if False, they are not captured + text : bool + If True, returns output as strings; if False, returns output as bytes + + Returns: + A tuple containing (stdout, stderr, return_code) + + Example: + >>> invoker = ExternalCmdInvoker() + >>> out, err, code = invoker.run_cmd(["python", "--version"]) + >>> print(f"Command exited with code: {code}") + + Note: + This method will NOT work for: + - Windows cmd builtin commands like 'dir', 'cd' + - PowerShell commands like 'ls' (which is an alias for Get-ChildItem) + - Unix shell builtin commands like 'cd', 'export' + """ + cmd_str = self._quote(argv) + print(f"[RUN] {cmd_str}") # Log the copy-pasteable full command line + + completed = subprocess.run( + argv, + cwd=cwd, + env=env, + check=check, + capture_output=capture_output, + text=text, + ) + return completed.stdout, completed.stderr, completed.returncode + + def run_shell_cmd( + self, + cmd: str, + cwd: Optional[str] = None, + env: Optional[Dict[str, str]] = None, + check: bool = False, + capture_output: bool = True, + text: bool = True, + ) -> Tuple[str, str, int]: + """ + Execute a shell command with comprehensive control over execution parameters. + + This method provides a convenient wrapper around subprocess.run with shell=True, + allowing execution of shell builtin commands like 'dir' on Windows or 'ls' on Unix. + Use this method when you need to execute shell builtin commands or when you want + to execute commands through the system shell. + + Args: + cmd : str + The command string to execute through the shell, e.g. "dir" or "ls -l" + cwd : str | None + Working directory for the command execution; if None, inherits the parent process directory + env : dict | None + Additional environment variables; if None, inherits the parent process environment + check : bool + If True, raises a CalledProcessError when the command returns a non-zero exit code + capture_output : bool + If True, captures and returns stdout and stderr; if False, they are not captured + text : bool + If True, returns output as strings; if False, returns output as bytes + + Returns: + A tuple containing (stdout, stderr, return_code) + + Example: + >>> invoker = ExternalCmdInvoker() + >>> # On Windows (cmd builtins) + >>> out, err, code = invoker.run_shell_cmd("dir") + >>> + >>> # On Windows (PowerShell aliases work when PowerShell is default shell) + >>> out, err, code = invoker.run_shell_cmd("ls") + >>> + >>> # On Unix/Linux/macOS + >>> out, err, code = invoker.run_shell_cmd("ls -l") + >>> print(f"Command exited with code: {code}") + + Note: + This method works for: + - Windows cmd builtin commands like 'dir', 'cd' + - PowerShell commands and aliases like 'ls' + - Unix shell builtin commands like 'cd', 'export' + - All commands that work when typed directly in the system shell + """ + print(f"[RUN] {cmd}") # Log the command + + completed = subprocess.run( + cmd, + shell=True, + cwd=cwd, + env=env, + check=check, + capture_output=capture_output, + text=text, + ) + return completed.stdout, completed.stderr, completed.returncode + + +# For backward compatibility, expose the old function-based interface +def run_cmd( + argv: List[str], + cwd: Optional[str] = None, + env: Optional[Dict[str, str]] = None, + check: bool = False, + capture_output: bool = True, + text: bool = True, +) -> Tuple[str, str, int]: + """ + Backward compatibility function. + + Execute an external command with comprehensive control over execution parameters. + This is a wrapper around ExternalCmdInvoker.run_cmd() for backward compatibility. + Note: This method cannot execute shell builtin commands like 'dir' on Windows. + For shell builtin commands, use ExternalCmdInvoker class directly. + """ + invoker = ExternalCmdInvoker() + return invoker.run_cmd(argv, cwd, env, check, capture_output, text) + + +# —————— Simple Example —————— +if __name__ == "__main__": + # Example: java -jar demo.jar --input sample.txt + # Running a sample command to demonstrate usage + invoker = ExternalCmdInvoker() + out, err, code = invoker.run_shell_cmd( + ["java", "-jar", "demo.jar", "--input", "sample.txt"], check=False + ) + print("exit code:", code) + if out: + print("stdout:\n", out) + if err: + print("stderr:\n", err) diff --git a/ete4/py4egps_treeview/__init__.py b/ete4/py4egps_treeview/__init__.py new file mode 100644 index 000000000..09cc41dbc --- /dev/null +++ b/ete4/py4egps_treeview/__init__.py @@ -0,0 +1,4 @@ +from .ExternalCmdInvoker import ExternalCmdInvoker, run_cmd +from .external_jvm_launcher import launch_egps_treeview,configure_egps,get_user_egps_configure + +__all__ = ["ExternalCmdInvoker", "run_cmd", "launch_egps_treeview","configure_egps", "get_user_egps_configure"] \ No newline at end of file diff --git a/ete4/py4egps_treeview/external_jvm_launcher.py b/ete4/py4egps_treeview/external_jvm_launcher.py new file mode 100644 index 000000000..a19677155 --- /dev/null +++ b/ete4/py4egps_treeview/external_jvm_launcher.py @@ -0,0 +1,131 @@ +import platform +import subprocess +import json +from pathlib import Path +from typing import Sequence, Optional, List +from ete4 import Tree + +""" +The JVM launcher configuration: +{config_egps_file} +The path is same as Py4eGPS package, so, user do not need to configurator again. +""" + +config_file_default_template = """ +# The blank area of top,left,bottom,right . +$blank.space=20,20,80,80 +# Whether display the leaf label on the tree. +$show.leaf.label=T +""".splitlines() + +config_egps_file = Path.home() / '.eGPS4Py' / 'egps_config.json' + + + +def configure_egps(egps_software_path: str): + """ + Configure JVM paths and save to user directory to avoid permission issues. + + Args: + egps_software_path (str): Path to eGPS software path + """ + path_installed = Path(egps_software_path) + path_java_bin = path_installed / "jre" / "bin" / "java" + data = {'egps_software_path': path_installed.resolve().as_posix(), 'java_bin': path_java_bin.resolve().as_posix()} + # Save configuration to user home directory to avoid permission issues + config_dir = Path.home() / '.eGPS4Py' + config_dir.mkdir(exist_ok=True) + with open(config_egps_file, 'w') as f: + json.dump(data, f) + + +def get_user_egps_configure(): + """ + Get user-specific eGPS configuration from home directory. + + Returns: + tuple: A tuple containing (java_vm_path, java_class_path) + + Raises: + FileNotFoundError: If user configuration file does not exist + """ + # Read configuration from user directory + if config_egps_file.exists(): + with open(config_egps_file, 'r') as f: + loaded_data = json.load(f) + return loaded_data['egps_software_path'], loaded_data['java_bin'] + else: + # If user configuration does not exist, raise exception + raise FileNotFoundError("JVM configuration not found. Please call configure_jvm() first.") +def launch_egps_treeview( + tree: Tree, + jvm_opts: Optional[Sequence[str]] = None, + log_file_name: str = "egps.log.file", + config_templete: Optional[Sequence[str]] = None, + extra_configs: Optional[Sequence[str]] = None, + +) -> subprocess.Popen: + """ + Start a Swing-based JAR and return the Popen handle. + + Parameters + ---------- + tree: Tree + The ete4 tree instance + jvm_opts : list[str] | None + Extra JVM flags, e.g. ["-Xmx2G", "-Dmy.prop=value"]. + """ + + if not config_egps_file.exists(): + raise FileNotFoundError(f"Please configure eGPS software path first, use configure_egps()") + egps_installed_path,java_bin = get_user_egps_configure() + if config_templete: + config_file_template_suffix = config_templete + else: + config_file_template_suffix = config_file_default_template + + output_config_list = [] + if extra_configs: + output_config_list.append(extra_configs) + + + cmd = [java_bin,"-cp", "./eGPS_lib/*;.","@eGPS2.args","-splash:./laucher.gif","-Xss2m","-Xms7g","-Xmx8g"] + if jvm_opts: + cmd += list(jvm_opts) + + tmp_config_file = Path("temp.egps.modern.tree.view.txt") + path_of_config = tmp_config_file.resolve().as_posix() + + cmd += ["api.rpython.ModernTreeViewPyLauncher", path_of_config] + # cmd = ["python", "-c", "import os, sys; print(os.getcwd())"] # test for the cwd + print(f"[RUN CMD:] {cmd}") + # ② 处理日志 + log_file = open(log_file_name, "a") + stdout = log_file + stderr = log_file + + # ③ 跨平台独立 / 进程组设置 + system = platform.system() + creationflags = 0 + preexec_fn = None + + proc = subprocess.Popen( + cmd, + cwd = egps_installed_path, + stdout=stdout, + stderr=stderr, + creationflags=creationflags, + preexec_fn=preexec_fn, + text=True, + ) + tmp_tree_file = Path("temp.tree.nwk") + + tree.write(outfile=tmp_tree_file) + output_config_list.append(f"$input.nwk.path={tmp_tree_file.resolve().as_posix()}") + output_config_list.extend(config_file_template_suffix) + + tmp_config_file.write_text("\n".join(output_config_list)) + + print(f"[INFO] GUI launched (pid={proc.pid})") + return proc + diff --git a/examples/general/random_tree.png b/examples/general/random_tree.png deleted file mode 100644 index 69e643686..000000000 Binary files a/examples/general/random_tree.png and /dev/null differ diff --git a/pyproject.toml b/pyproject.toml index 95af9fa0c..afdf7e693 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,53 +1,53 @@ -[build-system] -requires = ["setuptools>=61.0", "cython"] -build-backend = "setuptools.build_meta" - -[project] -name = "ete4" -description = "A Python Environment for (phylogenetic) Tree Exploration" -version = "4.3.0" -authors = [ - { name = "Jaime Huerta-Cepas", email = "jhcepas@gmail.com" }, - { name = "Jordi Burguet-Castell", email = "jordi.burguet.castell@gmail.com" }, -] -readme = "README.md" -license = "GPL-3.0-or-later" -license-files = [ "LICENSE" ] -requires-python = ">=3.7" -classifiers = [ - "Programming Language :: Python :: 3", - "Development Status :: 6 - Mature", - "Environment :: Console", - "Environment :: X11 Applications :: Qt", - "Intended Audience :: Developers", - "Intended Audience :: Other Audience", - "Intended Audience :: Science/Research", - "Natural Language :: English", - "Operating System :: MacOS", - "Operating System :: Microsoft :: Windows", - "Operating System :: POSIX :: Linux", - "Topic :: Scientific/Engineering :: Bio-Informatics", - "Topic :: Scientific/Engineering :: Visualization", - "Topic :: Software Development :: Libraries :: Python Modules", -] -dependencies = [ "bottle", "brotli", "numpy", "scipy", "requests" ] - -[project.urls] -"Homepage" = "http://etetoolkit.org" - -[project.scripts] -ete4 = "ete4.tools.ete:main" - -[project.optional-dependencies] -treeview = [ "pyqt6" ] -render_sm = [ "selenium" ] -treediff = [ "lap" ] -test = [ "pytest>=6.0" ] -doc = [ "sphinx" ] - -[tool.setuptools] -ext-modules = [ - { name = "ete4.core.tree", sources = [ "ete4/core/tree.pyx" ] }, - { name = "ete4.core.operations", sources = [ "ete4/core/operations.pyx" ] }, - { name = "ete4.parser.newick", sources = [ "ete4/parser/newick.pyx" ] }, -] +[build-system] +requires = ["setuptools>=61.0", "cython"] +build-backend = "setuptools.build_meta" + +[project] +name = "ete4" +description = "A Python Environment for (phylogenetic) Tree Exploration" +version = "4.3.0" +authors = [ + { name = "Jaime Huerta-Cepas", email = "jhcepas@gmail.com" }, + { name = "Jordi Burguet-Castell", email = "jordi.burguet.castell@gmail.com" }, +] +readme = "README.md" +license = "GPL-3.0-or-later" +license-files = [ "LICENSE" ] +requires-python = ">=3.7" +classifiers = [ + "Programming Language :: Python :: 3", + "Development Status :: 6 - Mature", + "Environment :: Console", + "Environment :: X11 Applications :: Qt", + "Intended Audience :: Developers", + "Intended Audience :: Other Audience", + "Intended Audience :: Science/Research", + "Natural Language :: English", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Visualization", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ "bottle", "brotli", "numpy", "scipy", "requests" ] + +[project.urls] +"Homepage" = "http://etetoolkit.org" + +[project.scripts] +ete4 = "ete4.tools.ete:main" + +[project.optional-dependencies] +treeview = [ "pyqt6" ] +render_sm = [ "selenium" ] +treediff = [ "lap" ] +test = [ "pytest>=6.0" ] +doc = [ "sphinx" ] + +[tool.setuptools] +ext-modules = [ + { name = "ete4.core.tree", sources = [ "ete4/core/tree.pyx" ] }, + { name = "ete4.core.operations", sources = [ "ete4/core/operations.pyx" ] }, + { name = "ete4.parser.newick", sources = [ "ete4/parser/newick.pyx" ] }, +] diff --git a/setup.py b/setup.py index 20a4a2531..2a6f503cf 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,16 @@ from setuptools import setup, Extension from glob import glob -from os.path import isfile +from os.path import isfile, sep from Cython.Build import cythonize def make_extension(path): # to create cython extensions the way we want - name = path.replace('/', '.')[:-len('.pyx')] # / -> . and remove .pyx + name = path.replace(sep, '.')[:-len('.pyx')] # use os-specific separator -> . and remove .pyx return Extension(name, [path], extra_compile_args=['-O3']) + setup( name='ete4', packages=['ete4',