Source code for geochemistrypi.cli

# -*- coding: utf-8 -*-
import os

# import platform
import subprocess
import threading
from typing import Optional

import typer
from rich import print

from ._version import __version__
from .data_mining.cli_pipeline import cli_pipeline
from .data_mining.enum import DataSource

app = typer.Typer()

CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
FRONTEND_PATH = os.path.join(CURRENT_PATH, "frontend")
BACKEND_PATH = os.path.join(CURRENT_PATH, "start_dash_pipeline.py")
PIPELINE_PATH = os.path.join(CURRENT_PATH, "start_cli_pipeline.py")


def _version_callback(value: bool) -> None:
    """Show Geochemistry Pi version."""
    if value:
        typer.echo(f"Geochemistry π {__version__}")
        raise typer.Exit()


[docs] @app.callback() def main(version: Optional[bool] = typer.Option(None, "--version", "-v", help="Show version.", callback=_version_callback, is_eager=True)) -> None: """ Geochemistry π is an open-sourced highly automated machine learning Python framework for data-driven geochemistry discovery. It has the cores components of continous training, machine learning lifecycle management and model inference. """ return
[docs] @app.command() def data_mining( data: str = typer.Option("", help="The path of the training data without model inference."), desktop: bool = typer.Option(False, help="Use the data in the directory 'geopi_input' on the desktop for model training and model inference."), training: str = typer.Option("", help="The path of the training data."), application: str = typer.Option("", help="The path of the inference data."), mlflow: bool = typer.Option(False, help="Start the mlflow server."), # web: bool = False, ) -> None: """Implement the customized automated machine learning pipeline for geochemistry data mining.""" def start_backend(): """Start the backend server.""" start_backend_command = f"python {BACKEND_PATH}" subprocess.run(start_backend_command, shell=True) def start_frontend(): """Start the frontend server.""" start_frontend_command = f"cd {FRONTEND_PATH} && yarn start" subprocess.run(start_frontend_command, shell=True) def start_mlflow(): """Start the mlflow server.""" # Check if the current working directory has the 'geopi_tracking' directory to store the tracking data for mlflow # If yes, set the MLFLOW_STORE_PATH to the current working directory # If no, set the MLFLOW_STORE_PATH to the desktop cur_working_dir = os.getcwd() geopi_tracking_dir = os.path.join(cur_working_dir, "geopi_tracking") if not os.path.exists(geopi_tracking_dir): print(f"[bold red]The 'geopi_tracking' directory is not found in the current working directory '{cur_working_dir}'.[bold red]") geopi_tracking_dir = os.path.join(os.path.expanduser("~"), "Desktop", "geopi_tracking") if not os.path.exists(geopi_tracking_dir): print("[bold red]The 'geopi_tracking' directory is not found on the desktop.[bold red]") print("[bold green]Creating the 'geopi_tracking' directory ...[/bold green]") print("[bold green]Successfully create 'geopi_tracking' directory on the desktop to store the tracking data for mlflow.[/bold green]") else: print("[bold green]The 'geopi_tracking' directory is found on the desktop.[bold green]") print("[bold green]Our software will use the 'geopi_tracking' directory on the desktop to store the tracking data for mlflow.[bold green]") else: print(f"[bold green]The 'geopi_tracking' directory is found in the current working directory '{cur_working_dir}'.[bold green]") print("[bold green]Our software will use the 'geopi_tracking' directory in the current working directory to store the tracking data for mlflow.[bold green]") MLFLOW_STORE_PATH = "file:///" + geopi_tracking_dir print("[bold green]Press [bold magenta]Ctrl + C[/bold magenta] to close mlflow server at any time.[bold green]") start_mlflow_command = f"mlflow ui --backend-store-uri {MLFLOW_STORE_PATH} " subprocess.run(start_mlflow_command, shell=True) # TODO: Currently, the web application is not fully implemented. It is disabled by default. web = False if web: # Start the backend and frontend in parallel backend_thread = threading.Thread(target=start_backend) backend_thread.start() frontend_thread = threading.Thread(target=start_frontend) frontend_thread.start() # Wait for the threads to finish backend_thread.join() frontend_thread.join() else: if mlflow: # If mlflow is enabled, start the mlflow server, otherwise start the CLI pipeline mlflow_thread = threading.Thread(target=start_mlflow) mlflow_thread.start() elif desktop: # Start the CLI pipeline with the data in the directory 'geopi_input' on the desktop # - Both continuous training and model inference # - Continuous training only cli_pipeline(training_data_path="", application_data_path="", data_source=DataSource.DESKTOP) else: if data: # If the data is provided, start the CLI pipeline with continuous training cli_pipeline(training_data_path=data, application_data_path="", data_source=DataSource.ANY_PATH) elif training and application: # If the training data and inference data are provided, start the CLI pipeline with continuous training and inference cli_pipeline(training_data_path=training, application_data_path=application, data_source=DataSource.ANY_PATH) elif training and not application: # If the training data is provided, start the CLI pipeline with continuous training cli_pipeline(training_data_path=training, application_data_path="", data_source=DataSource.ANY_PATH) else: # If no data is provided, use built-in data to start the CLI pipeline with continuous training and inference cli_pipeline(training_data_path="", application_data_path="", data_source=DataSource.BUILT_IN)
# TODO: Currently, the web application is not fully implemented. It is disabled by default. # @app.command() # def web_setup() -> None: # """Set up the dependency of the web application.""" # my_os = platform.system() # if my_os == "Windows": # # Define the command to download and install Yarn on Windows using Chocolatey package manager # download_yarn = "choco install yarn" # subprocess.run(download_yarn, shell=True) # # Define the command to download and install Node.js on Windows using Chocolatey package manager # download_node = "choco install nodejs" # subprocess.run(download_node, shell=True) # elif my_os == "Linux": # # Define the command to download and install Yarn on Linux using npm # download_yarn = "apt-get install -y yarn" # subprocess.run(download_yarn, shell=True) # # Define the command to download and install Node.js on Linux using npm # download_node = "apt-get install -y nodejs" # subprocess.run(download_node, shell=True) # elif my_os == "Darwin": # try: # check_node = "node --version" # subprocess.run(check_node, shell=True) # print("Node.js is already installed.") # except subprocess.CalledProcessError: # # Define the command to download and install Node.js on macOS using Homebrew # download_node = "brew install node" # subprocess.run(download_node, shell=True) # try: # # Define the command to check if Yarn is installed # check_yarn = "yarn --version" # subprocess.run(check_yarn, shell=True) # print("Yarn is already installed.") # except subprocess.CalledProcessError: # # Define the command to download and install Yarn on macOS using Homebrew # download_yarn = "brew install yarn" # subprocess.run(download_yarn, shell=True) # # Define the command to install the frontend dependencies # install_frontend_dependency_cmd = f"cd {FRONTEND_PATH} && yarn install" # subprocess.run(install_frontend_dependency_cmd, shell=True)