#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Stop on error
set -e
# Set nullglob for when we are checking existence based on globs
shopt -s nullglob

FWDIR="$(cd "$(dirname "$0")"/..; pwd)"
cd "$FWDIR"

# Clean ignored/untracked files that do not need
# for pip packaging test. Machines in GitHub Action do not have
# enough space, see also SPARK-44557.
# We need to keep the coverage and test report from previous tests in coverage runs.
if [[ ! -z "${GITHUB_ACTIONS}" ]]; then
  git clean -d -f -x -e assembly -e python/coverage.xml -e target/test-reports/
fi

echo "Constructing virtual env for testing"
VIRTUALENV_BASE=$(mktemp -d)

# Clean up the virtual env environment used if we created one.
function delete_virtualenv() {
  echo "Cleaning up temporary directory - $VIRTUALENV_BASE"
  rm -rf "$VIRTUALENV_BASE"
}
trap delete_virtualenv EXIT


if [ -z "${PYTHON_TO_TEST}" ]; then
  PYTHON_EXECUTABLE="python3"
else
  PYTHON_EXECUTABLE="${PYTHON_TO_TEST}"
fi

if ! hash "$PYTHON_EXECUTABLE" 2>/dev/null; then
  echo "Python executable $PYTHON_EXECUTABLE not installed on system, skipping pip installability tests"
  exit 0
fi

echo "Using Python executable: $PYTHON_EXECUTABLE"

# Determine which version of PySpark we are building for archive name
PYSPARK_VERSION=$($PYTHON_EXECUTABLE -c "exec(open('python/pyspark/version.py').read());print(__version__)")
PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz"
# The pip install options we use for all the pip commands
PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall --use-pep517"
# Test both regular user and edit/dev install modes.
PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST"
	      "pip install $PIP_OPTIONS -e python/packaging/classic")

# Jenkins has PySpark installed under user sitepackages shared for some reasons.
# In this test, explicitly exclude user sitepackages to prevent side effects
export PYTHONNOUSERSITE=1

for install_command in "${PIP_COMMANDS[@]}"; do
  # Create a temp directory for us to work in and save its name to a file for cleanup
  echo "Using $VIRTUALENV_BASE for virtualenv"
  VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
  rm -rf "$VIRTUALENV_PATH"
  $PYTHON_EXECUTABLE -m venv "$VIRTUALENV_PATH"
  source "$VIRTUALENV_PATH"/bin/activate
  pip install --upgrade pip wheel numpy setuptools

  echo "Creating pip installable source dist"
  cd "$FWDIR"/python
  # Delete the egg info file if it exists, this can cache the setup file.
  rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
  python3 packaging/classic/setup.py sdist

  echo "Installing dist into virtual env"
  cd dist
  # Verify that the dist directory only contains one thing to install
  sdists=(*.tar.gz)
  if [ ${#sdists[@]} -ne 1 ]; then
    echo "Unexpected number of targets found in dist directory - please cleanup existing sdists first."
    exit -1
  fi
  # Do the actual installation
  cd "$FWDIR"
  $install_command

  cd /

  echo "Run basic sanity check on pip installed version with spark-submit"
  spark-submit "$FWDIR"/dev/pip-sanity-check.py
  echo "Run basic sanity check with import based"
  python3 "$FWDIR"/dev/pip-sanity-check.py
  echo "Run the tests for context.py"
  python3 "$FWDIR"/python/pyspark/core/context.py

  cd "$FWDIR"

  deactivate

done

exit 0
