# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4

PortSystem          1.0
PortGroup           github  1.0
PortGroup           java    1.0

github.setup        stanfordnlp CoreNLP 3.9.2 v
name                stanford-corenlp
revision            0

categories          textproc java
platforms           any
license             GPL-3+
maintainers         nomaintainer
supported_archs     noarch

description         A Java suite of core NLP tools
long_description    Stanford CoreNLP provides a set of natural language \
                    analysis tools written in Java. It can take raw \
                    human language text input and give the base forms \
                    of words, their parts of speech, whether they are \
                    names of companies, people, etc., normalize and \
                    interpret dates, times, and numeric quantities, \
                    mark up the structure of sentences in terms of \
                    phrases or word dependencies, and indicate which \
                    noun phrases refer to the same entities. It was \
                    originally developed for English, but now also \
                    provides varying levels of support for (Modern \
                    Standard) Arabic, (mainland) Chinese, French, \
                    German, and Spanish. Stanford CoreNLP is an \
                    integrated framework, which makes it very easy to \
                    apply a bunch of language analysis tools to a \
                    piece of text. Starting from plain text, you can \
                    run all the tools with just two lines of code. Its \
                    analyses provide the foundational building blocks \
                    for higher-level and domain-specific text \
                    understanding applications. Stanford CoreNLP is a \
                    set of stable and well-tested natural language \
                    processing tools, widely used by various groups in \
                    academia, industry, and government. The tools \
                    variously use rule-based, probabilistic machine \
                    learning, and deep learning components.

homepage            https://stanfordnlp.github.io/CoreNLP/

github.livecheck.regex \
                    {([0-9.]+)}

use_zip             yes
github.tarball_from \
                    archive

checksums           rmd160  97f6bbcc3cf598251c36dbd2fcd8d51b6a6ac85d \
                    sha256  cf46d0ee23410a5e2b0e43af33a4e7e42a958571f21479b3c0b305a13ce6c14a \
                    size    74785152

if {${name} eq ${subport}} {
    depends_lib-append \
                    port:apache-ant
}
if {${subport} ne "${name}-models"} {
    depends_run-append \
                    port:${name}-models
}

use_configure       no

# Required java version
java.version        1.8+
# LTS JDK port to install if required java not found
java.fallback       openjdk11

set dest_java ${destroot}${prefix}/share/java/${name}
set dest_doc ${destroot}${prefix}/share/doc/${name}

if { ${name} eq ${subport} } {
    build.cmd       ant
    build.target
    build.pre_args
    build.args

    post-build {
        system -W ${worksrcpath}/classes \
            "jar -cf ../${name}-${version}.jar edu"
    }

    proc xinstall_recursive_dir {srcbase destbase reldir} {
        if {[file isdirectory ${srcbase}/${reldir}]} {
            xinstall -d \
                ${destbase}/${reldir}
            fs-traverse f_or_d ${srcbase}/${reldir} {
                set relative_path [strsed ${f_or_d} "s|${srcbase}||"]
                if { [file isfile ${f_or_d}] } {
                    xinstall -m 0644 \
                        ${f_or_d} \
                        ${destbase}${relative_path}
                } elseif { [file isdirectory ${f_or_d}] } {
                    xinstall -d \
                        ${destbase}${relative_path}         
                }
            }
        }
    }

    destroot {
        # install jar files, scripts, classes, etc.
        xinstall -d \
            ${dest_java} \
            ${dest_java}/classes \
            ${dest_doc}
        foreach f [glob ${worksrcpath}/*.{jar,properties}] {
            xinstall -m 0644 ${f} ${dest_java}
        }
        foreach d { lib liblocal libsrc } {
            fs-traverse f ${worksrcpath}/${d} {
                if { [file isfile ${f}]
                     && ( [string match "*.jar" ${f}]
                         || [string match "*.properties" ${f}] ) } {
                    xinstall -m 0644 ${f} ${dest_java}
                }
            }        
        }
        xinstall_recursive_dir ${worksrcpath} ${dest_java} classes
        xinstall_recursive_dir ${worksrcpath} ${dest_java} src
        # install additional documents
        xinstall_recursive_dir ${worksrcpath} ${dest_doc} doc
        xinstall_recursive_dir ${worksrcpath} ${dest_doc} licenses
        xinstall_recursive_dir ${worksrcpath} ${dest_doc} scripts
        xinstall -m 0644 -W ${worksrcpath} \
            LICENSE.txt README.md RESOURCE-LICENSES \
            ${dest_doc}    
    }
}

# individual language models
proc stanford-corenl-models-setup {} {
    global name subport version

    if {${subport} ne "${name}-models"} {
        depends_run-append \
                    port:stanford-corenlp
    }

    master_sites    https://repo.maven.apache.org/maven2/edu/stanford/nlp/${name}/${version}/
    set subport_tag \
                    [string range ${subport} [string length ${name}-] end]
    if {"models" eq ${subport_tag}} {
        distname        ${name}-${version}-${subport_tag}
    } else {
        distname        ${name}-${version}-models-${subport_tag}
    }
    extract.suffix      .jar
    extract.only
    dist_subdir         ${subport}
    build {}
    destroot {
        # install jar files, scripts, classes, etc.
        xinstall -d ${dest_java}
        xinstall -m 0644 -W ${distpath} ${distname}.jar ${dest_java}
    }
}

subport ${name}-models {
    stanford-corenl-models-setup
    checksums       rmd160  94e4d75bdd0f496a765e681bd60e0f5a29b33872 \
                    sha256  5997eeebfd631f9eb779c1d4246ac5fcf09298ffbf6db5e00c0f1b80dc34ba10 \
                    size    362565193
}

subport ${name}-arabic {
    stanford-corenl-models-setup
    checksums       rmd160  ecf19e0f6a4d8ffdad228cb843178b491bfd55f5 \
                    sha256  7d102f4586731a4b15e8f109f879286790e74e6bdc3eb2e4ab09e99fd7375ba1 \
                    size    65787818
}

subport ${name}-chinese {
    stanford-corenl-models-setup
    checksums       rmd160  14d3964b8840ecc70db938b0e8c2194ac6d3bb49 \
                    sha256  22221e19ca2d8d4ac061d900b6432d6c2fc9b030a647d91d8b8cc9954b59053b \
                    size    1016572179
}

subport ${name}-english {
    stanford-corenl-models-setup
    checksums       rmd160  827c59a32f995f2ec4c7cff3929e16b565854d34 \
                    sha256  dbd1443383d2bd77f0fa083d04db33465445b714a5532cd57abb88ff7db7580c \
                    size    1038970602
}

subport ${name}-english-kbp {
    stanford-corenl-models-setup
    checksums       rmd160  b9f6ebe61dbce90eacbb4231c5192acb6e151b5b \
                    sha256  a10d22a266221f3fb34796847839a98462c1dd3e0b8ab34167768f21530278ce \
                    size    473999624
}

subport ${name}-french {
    stanford-corenl-models-setup
    checksums       rmd160  fa7fb4fc3657f23fe6c4f170c7049021b7a35f58 \
                    sha256  d2f5364cd00add931fe3346435051c091f9a005a415bbb54f8a8a67b42172c83 \
                    size    272154566
}

subport ${name}-german {
    stanford-corenl-models-setup
    checksums       rmd160  cc9ffa8d823105cbcc543249b15eb9feba9ec094 \
                    sha256  d992669d8d3a5b64f1fd92048d37e3f5936752c7408b4edf8d226a1a7ebb6e9f \
                    size    173812784
}

subport ${name}-spanish {
    stanford-corenl-models-setup
    checksums       rmd160  1f8b30f400307e501a05cc9e871f1693b125a83e \
                    sha256  2efce4c555a75281dded366c55613481bb9063447ffba022dc8b200f64ca3c54 \
                    size    359815433
}
