Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion R/pkg/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,11 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
error = function(e) { FALSE })) {
obj <- get(nodeChar, envir = func.env, inherits = FALSE)
if (is.function(obj)) {
if (is.primitive(obj)) {
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As of Spark 4.0, SparkR is deprecated. So just applying this change to branch-3.5 in this PR.
Do we need to apply this change for other branches including master?
CRAN seems to provide only the latest version of R. So it's difficult to pin to a older version, and this change is necessary at least for branch-3.5 for CI.

# Primitive functions have no closure to clean.
assign(nodeChar, obj, envir = newEnv)
break
}
# If the node is a function call.
funcList <- mget(nodeChar, envir = checkedFuncs, inherits = F,
ifnotfound = list(list(NULL)))[[1]]
Expand Down Expand Up @@ -592,7 +597,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
# return value
# a new version of func that has a correct environment (closure).
cleanClosure <- function(func, checkedFuncs = new.env()) {
if (is.function(func)) {
if (is.function(func) && !is.primitive(func)) {
newEnv <- new.env(parent = .GlobalEnv)
func.body <- body(func)
oldEnv <- environment(func)
Expand Down
27 changes: 13 additions & 14 deletions dev/infra/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,43 +19,39 @@
# See also in https://hub.docker.com/_/ubuntu
FROM ubuntu:focal-20221019

ENV FULL_REFRESH_DATE 20221118
ENV FULL_REFRESH_DATE 20260510

ENV DEBIAN_FRONTEND noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN true

ARG APT_INSTALL="apt-get install --no-install-recommends -y"

RUN apt-get clean
RUN apt-get update
RUN $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget openjdk-8-jdk libpython3-dev python3-pip python3-setuptools python3.8 python3.9
RUN apt-get update && $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget openjdk-8-jdk libpython3-dev python3-pip python3-setuptools python3.8 python3.9
RUN update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java

RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
RUN curl -sS https://bootstrap.pypa.io/pip/3.9/get-pip.py | python3.9

RUN add-apt-repository ppa:pypy/ppa
RUN apt update
RUN $APT_INSTALL gfortran libopenblas-dev liblapack-dev
RUN $APT_INSTALL build-essential
RUN apt-get update && $APT_INSTALL gfortran libopenblas-dev liblapack-dev build-essential

RUN mkdir -p /usr/local/pypy/pypy3.8 && \
curl -sqL https://downloads.python.org/pypy/pypy3.8-v7.3.11-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.8 --strip-components=1 && \
ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3.8 && \
ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3

RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
RUN curl -sS https://bootstrap.pypa.io/pip/3.8/get-pip.py | pypy3

RUN $APT_INSTALL gnupg ca-certificates pandoc
RUN apt-get update && $APT_INSTALL gnupg ca-certificates pandoc
RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list
RUN gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9
RUN gpg -a --export E084DAB9 | apt-key add -
RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/'
RUN apt update
RUN $APT_INSTALL r-base libcurl4-openssl-dev qpdf libssl-dev zlib1g-dev
RUN apt-get update && $APT_INSTALL r-base libcurl4-openssl-dev qpdf libssl-dev zlib1g-dev libuv1-dev
RUN Rscript -e "install.packages(c('remotes', 'knitr', 'markdown', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'roxygen2', 'xml2'), repos='https://cloud.r-project.org/')"

# See more in SPARK-39959, roxygen2 < 7.2.1
RUN apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \
RUN apt-get update && apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \
libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev \
libtiff5-dev libjpeg-dev
RUN Rscript -e "install.packages(c('remotes'), repos='https://cloud.r-project.org/')"
Expand All @@ -64,8 +60,11 @@ RUN Rscript -e "remotes::install_version('roxygen2', version='7.2.0', repos='htt
# See more in SPARK-39735
ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"

RUN pypy3 -m pip install numpy 'pandas<=2.0.3' scipy coverage matplotlib
RUN python3.9 -m pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
RUN printf 'beniget==0.4.1\npyproject-metadata==0.8.1\n' > /tmp/pypy-constraints.txt && \
PIP_CONSTRAINT=/tmp/pypy-constraints.txt pypy3 -m pip install numpy scipy coverage matplotlib && \
SETUPTOOLS_USE_DISTUTILS=stdlib pypy3 -m pip install 'pandas<=2.0.3' && \
rm /tmp/pypy-constraints.txt
RUN python3.9 -m pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly<6.0' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'

# Add Python deps for Spark Connect.
RUN python3.9 -m pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4'
Expand Down
6 changes: 6 additions & 0 deletions python/mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,9 @@ ignore_missing_imports = True
; Ignore errors for proto generated code
[mypy-pyspark.sql.connect.proto.*, pyspark.sql.connect.proto]
ignore_errors = True

[mypy-pydantic.*]
follow_imports = skip

[mypy-sqlalchemy.*]
follow_imports = skip