
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.

# "random" uid/gid hopefully not used anywhere else
# This needs to be set globally and then referenced in
# the subsequent stages -- see TIKA-3912
ARG UID_GID="35002:35002"

FROM ubuntu:resolute AS base

FROM base AS fetch_tika

ARG TIKA_VERSION
ARG CHECK_SIG=true

ENV TIKA_SERVER_ARCHIVE="tika-server-standard-${TIKA_VERSION}.zip" \
    NEAREST_TIKA_SERVER_URL="https://dlcdn.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip" \
    ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip" \
    BACKUP_TIKA_SERVER_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip" \
    DEFAULT_TIKA_SERVER_ASC_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip.asc" \
    ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.zip.asc" \
    TIKA_VERSION=$TIKA_VERSION

# 4.x publishes tika-server as a .zip distribution. The thin top-level
# tika-server-standard-<version>.jar uses its manifest Class-Path to resolve the
# jars under lib/, and tika-server reads pf4j plugins from the plugins/
# directory next to it.
RUN set -eux \
    && apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
        gnupg2 \
        wget \
        ca-certificates \
        unzip \
    && wget -t 10 --max-redirect 1 --retry-connrefused -qO- https://downloads.apache.org/tika/KEYS | gpg --import \
    && wget -t 10 --max-redirect 1 --retry-connrefused $NEAREST_TIKA_SERVER_URL -O /${TIKA_SERVER_ARCHIVE} || rm /${TIKA_SERVER_ARCHIVE} \
    && sh -c "[ -f /${TIKA_SERVER_ARCHIVE} ]" || wget $ARCHIVE_TIKA_SERVER_URL -O /${TIKA_SERVER_ARCHIVE} || rm /${TIKA_SERVER_ARCHIVE} \
    && sh -c "[ -f /${TIKA_SERVER_ARCHIVE} ]" || wget $BACKUP_TIKA_SERVER_URL -O /${TIKA_SERVER_ARCHIVE} || rm /${TIKA_SERVER_ARCHIVE} \
    && sh -c "[ -f /${TIKA_SERVER_ARCHIVE} ]" || exit 1 \
    && wget -t 10 --max-redirect 1 --retry-connrefused $DEFAULT_TIKA_SERVER_ASC_URL -O /${TIKA_SERVER_ARCHIVE}.asc || rm /${TIKA_SERVER_ARCHIVE}.asc \
    && sh -c "[ -f /${TIKA_SERVER_ARCHIVE}.asc ]" || wget $ARCHIVE_TIKA_SERVER_ASC_URL -O /${TIKA_SERVER_ARCHIVE}.asc || rm /${TIKA_SERVER_ARCHIVE}.asc \
    && sh -c "[ -f /${TIKA_SERVER_ARCHIVE}.asc ]" || exit 1 \
    && gpg --verify /${TIKA_SERVER_ARCHIVE}.asc /${TIKA_SERVER_ARCHIVE} \
    && mkdir -p /opt/tika-server \
    && unzip -q /${TIKA_SERVER_ARCHIVE} -d /opt/tika-server \
    && rm /${TIKA_SERVER_ARCHIVE} /${TIKA_SERVER_ARCHIVE}.asc

FROM base AS runtime
# must reference uid_gid
ARG UID_GID
ARG JRE='openjdk-25-jre-headless'
RUN set -eux \
    && apt-get update \
    && apt-get install --yes --no-install-recommends \
        ${JRE} \
        ca-certificates \
    && apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
ARG TIKA_VERSION
ENV TIKA_VERSION=$TIKA_VERSION
COPY --from=fetch_tika /opt/tika-server /opt/tika-server
# WORKDIR sets the CWD so tika-server's plugin-root fallback resolves
# `plugins/` relative to /opt/tika-server (its `getCodeSource()` returns a
# lib/* path, not the top-level jar, so the "next-to-jar" resolution misses).
WORKDIR /opt/tika-server
USER $UID_GID
EXPOSE 9998
# Classpath includes the thin server jar, its lib/ deps, and any user-mounted /tika-extras/.
# /opt/tika-server/* matches the single versioned tika-server-standard-<version>.jar
# at that level (Java's classpath wildcard only expands *.jar), so the entrypoint
# needs no edit when the version changes.
# tika-server auto-discovers pf4j plugins from /opt/tika-server/plugins/.
ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp \"/opt/tika-server/*:/opt/tika-server/lib/*:/tika-extras/*\" org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]

LABEL maintainer="Apache Tika Developers dev@tika.apache.org"
