<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

  <modelVersion>4.0.0</modelVersion>
  <groupId>io.archivesunleashed</groupId>
  <artifactId>aut</artifactId>
  <packaging>jar</packaging>
  <version>1.2.0</version>
  <name>Archives Unleashed Toolkit</name>
  <description>An open-source toolkit for analyzing web archives.</description>
  <url>https://github.com/archivesunleashed/aut</url>
  <inceptionYear>2017</inceptionYear>

  <organization>
    <name>The Archives Unleashed Project</name>
    <url>https://archivesunleashed.org/</url>
  </organization>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
    <project.name>${project.artifactId}</project.name>
    <scala.version>2.12.10</scala.version>
    <scala.binary.version>2.12</scala.binary.version>
    <hadoop.version>2.7.4</hadoop.version>
    <spark.version>3.0.1</spark.version>
    <guava.version>29.0-jre</guava.version>
    <github.global.server>github</github.global.server>
    <license.plugin.version>3.0</license.plugin.version>
    <release.plugin.version>2.5.2</release.plugin.version>
    <scm-provider-gitexe.plugin.version>1.9.5</scm-provider-gitexe.plugin.version>
    <changelog.plugin.version>2.3</changelog.plugin.version>
    <github-site.plugin.version>0.12</github-site.plugin.version>
    <gpg.plugin.version>1.6</gpg.plugin.version>
    <build-helper.plugin.version>3.0.0</build-helper.plugin.version>
    <deploy.plugin.version>2.8.2</deploy.plugin.version>
    <site.plugin.version>3.3</site.plugin.version>
    <project-info-reports.plugin.version>2.7</project-info-reports.plugin.version>
    <doxia-markdown.plugin.version>1.7</doxia-markdown.plugin.version>
    <failsafe.plugin.version>2.22.0</failsafe.plugin.version>
    <jxr.plugin.version>2.5</jxr.plugin.version>
    <surefire.plugin.version>2.22.0</surefire.plugin.version>
    <jacoco.plugin.version>0.8.4</jacoco.plugin.version>
    <versions.plugin.version>2.1</versions.plugin.version>
    <tika.version>1.23</tika.version>
    <jackson.version>2.10.0</jackson.version>
    <scala.maven.plugin.version>4.5.4</scala.maven.plugin.version>
  </properties>

  <licenses>
    <license>
      <name>The Apache Software License, Version 2.0</name>
      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
      <distribution>repo</distribution>
    </license>
  </licenses>

  <scm>
    <connection>scm:git:git@github.com:archivesunleashed/aut.git</connection>
    <developerConnection>scm:git:git@github.com:archivesunleashed/aut.git</developerConnection>
    <url>git@github.com:archivesunleashed/aut.git</url>
    <tag>aut-1.2.0</tag>
  </scm>

  <repositories>
    <repository>
      <id>maven</id>
      <url>https://repo.maven.apache.org/maven2/</url>
    </repository>
    <repository>
      <id>jitpack.io</id>
      <url>https://jitpack.io</url>
    </repository>
  </repositories>

  <build>
    <plugins>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-compiler-plugin</artifactId>
        <version>3.8.0</version>
        <configuration>
          <debug>true</debug>
          <release>11</release>
          <source>11</source>
          <target>11</target>
          <compilerArgument>-Xlint:unchecked,deprecation</compilerArgument>
        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-shade-plugin</artifactId>
        <version>3.2.1</version>
        <executions>
          <execution>
            <phase>package</phase>
            <goals>
              <goal>shade</goal>
            </goals>
            <configuration>
              <transformers>
                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                  <resource>META-INF/services/org.apache.lucene.codecs.Codec</resource>
                </transformer>
              </transformers>
              <relocations>
                <relocation>
                  <pattern>com.google.common.</pattern>
                  <shadedPattern>com.google.common.shaded.</shadedPattern>
                </relocation>
              </relocations>
              <!-- This fixes the issue "Invalid signature file digest for Manifest main attributes"
                   cf. http://zhentao-li.blogspot.com/2012/06/maven-shade-plugin-invalid-signature.html -->
              <filters>
                <filter>
                  <artifact>*:*</artifact>
                  <excludes>
                    <exclude>META-INF/*.SF</exclude>
                    <exclude>META-INF/*.DSA</exclude>
                    <exclude>META-INF/*.RSA</exclude>
                  </excludes>
                </filter>
              </filters>
              <!-- This will create both a normal thin jar and also a fatjar. -->
              <shadedArtifactAttached>true</shadedArtifactAttached>
              <shadedClassifierName>fatjar</shadedClassifierName>
              <artifactSet>
                <excludes>
                  <exclude>org.apache.hadoop:hadoop-core</exclude>
                  <exclude>org.apache.hadoop:hadoop-common</exclude>
                  <exclude>org.apache.hadoop:hadoop-mapreduce-client-core</exclude>
                  <exclude>org.apache.spark:*</exclude>
                </excludes>
              </artifactSet>
            </configuration>
          </execution>
        </executions>
      </plugin>

      <!-- For Scala. -->
      <plugin>
        <groupId>net.alchim31.maven</groupId>
        <artifactId>scala-maven-plugin</artifactId>
        <version>${scala.maven.plugin.version}</version>
        <executions>
          <execution>
            <phase>process-resources</phase>
            <goals>
              <goal>add-source</goal>
              <goal>compile</goal>
            </goals>
          </execution>
          <execution>
            <id>scala-test-compile</id>
            <phase>process-test-resources</phase>
            <goals>
              <goal>testCompile</goal>
            </goals>
          </execution>
          <execution>
            <id>attach-scaladocs</id>
            <phase>verify</phase>
            <goals>
              <goal>doc-jar</goal>
            </goals>
            <configuration>
              <args>
                <arg>-no-java-comments</arg>
                <arg>-no-link-warnings</arg>
              </args>
            </configuration>
          </execution>
        </executions>
        <configuration>
          <scalaVersion>${scala.version}</scalaVersion>
          <checkMultipleScalaVersions>true</checkMultipleScalaVersions>
          <failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
          <sendJavaToScalac>true</sendJavaToScalac>
          <args>
            <arg>-unchecked</arg>
            <arg>-deprecation</arg>
            <arg>-feature</arg>
            <arg>-explaintypes</arg>
            <arg>-target:jvm-1.8</arg>
            <arg>-Ywarn-unused-import</arg>
          </args>
          <compilerPlugins>
            <compilerPlugin>
              <groupId>org.scalameta</groupId>
              <artifactId>semanticdb-scalac_${scala.version}</artifactId>
              <version>4.6.0</version>
            </compilerPlugin>
          </compilerPlugins>
        </configuration>
      </plugin>
      <!-- For license header enforcement. -->
      <plugin>
        <groupId>com.mycila</groupId>
        <artifactId>license-maven-plugin</artifactId>
        <version>${license.plugin.version}</version>
        <configuration>
          <header>config/LICENSE_HEADER.txt</header>
          <mapping>
            <scala>SLASHSTAR_STYLE</scala>
          </mapping>
          <includes>
            <include>src/main/scala/**</include>
            <include>src/test/scala/**</include>
          </includes>
          <excludes>
            <exclude>target/**</exclude>
            <exclude>src/test/resources/**</exclude>
            <exclude>src/main/resources/**</exclude>
            <exclude>**/*.properties</exclude>
          </excludes>
          <properties>
            <owner>${project.organization.name}</owner>
          </properties>
          <encoding>UTF-8</encoding>
          <strictCheck>true</strictCheck>
        </configuration>
        <executions>
          <execution>
            <goals>
              <goal>check</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
      <!-- So we can release aut. -->
      <plugin>
        <artifactId>maven-release-plugin</artifactId>
        <version>${release.plugin.version}</version>
        <configuration>
          <!-- see http://jira.codehaus.org/browse/MRELEASE-424 -->
          <mavenExecutorId>forked-path</mavenExecutorId>
        </configuration>
        <dependencies>
          <dependency>
            <groupId>org.apache.maven.scm</groupId>
            <artifactId>maven-scm-provider-gitexe</artifactId>
            <version>${scm-provider-gitexe.plugin.version}</version>
          </dependency>
        </dependencies>
      </plugin>
      <!-- GitHub Pages -->
      <plugin>
        <artifactId>maven-deploy-plugin</artifactId>
        <version>${deploy.plugin.version}</version>
      </plugin>
      <plugin>
        <artifactId>maven-site-plugin</artifactId>
        <version>${site.plugin.version}</version>
        <configuration>
          <skipDeploy>true</skipDeploy>
        </configuration>
        <dependencies>
          <dependency>
            <!-- Allows markdown syntax for site generation. To use it
                 place files below src/site/markdown/[filename].md -->
            <groupId>org.apache.maven.doxia</groupId>
            <artifactId>doxia-module-markdown</artifactId>
            <version>${doxia-markdown.plugin.version}</version>
          </dependency>
        </dependencies>
      </plugin>
      <plugin>
        <groupId>com.github.github</groupId>
        <artifactId>site-maven-plugin</artifactId>
        <version>${github-site.plugin.version}</version>
        <configuration>
          <message>Creating site for ${project.artifactId}, ${project.version}</message>
          <path>${project.distributionManagement.site.url}</path>
          <merge>true</merge>
          <excludes>
            <exclude>xref-test/**</exclude>
            <exclude>testapidocs/**</exclude>
          </excludes>
        </configuration>
        <executions>
          <execution>
            <id>github</id>
            <goals>
              <goal>site</goal>
            </goals>
            <phase>site-deploy</phase>
          </execution>
        </executions>
      </plugin>
      <plugin>
        <artifactId>maven-changelog-plugin</artifactId>
        <version>${changelog.plugin.version}</version>
      </plugin>
      <plugin>
        <groupId>org.codehaus.mojo</groupId>
        <artifactId>build-helper-maven-plugin</artifactId>
        <version>${build-helper.plugin.version}</version>
      </plugin>
      <!-- This is to create a zip of PySpark modules. -->
      <plugin>
        <artifactId>maven-assembly-plugin</artifactId>
        <version>2.6</version>
        <configuration>
          <descriptors>
            <descriptor>src/main/assembly/python.xml</descriptor>
          </descriptors>
          <finalName>aut</finalName>
          <appendAssemblyId>false</appendAssemblyId>
        </configuration>
        <executions>
          <execution>
            <id>make-assembly</id>
            <phase>package</phase>
            <goals>
              <goal>single</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
      <plugin>
        <groupId>org.jacoco</groupId>
        <artifactId>jacoco-maven-plugin</artifactId>
        <version>${jacoco.plugin.version}</version>
        <executions>
          <execution>
            <goals>
              <goal>prepare-agent</goal>
            </goals>
          </execution>
          <execution>
            <id>report</id>
            <phase>test</phase>
            <goals>
              <goal>report</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
      <plugin>
        <groupId>io.github.evis</groupId>
        <artifactId>scalafix-maven-plugin_${scala.binary.version}</artifactId>
        <version>0.1.7_0.10.4</version>
      </plugin>
    </plugins>
  </build>

  <reporting>
    <plugins>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-failsafe-plugin</artifactId>
        <version>${failsafe.plugin.version}</version>
        <configuration>
          <argLine>--illegal-access=permit</argLine>
        </configuration>
      </plugin>
      <plugin>
        <artifactId>maven-jxr-plugin</artifactId>
        <version>${jxr.plugin.version}</version>
      </plugin>
      <plugin>
        <artifactId>maven-surefire-report-plugin</artifactId>
        <version>${surefire.plugin.version}</version>
        <configuration>
          <argLine>-XX:-UseSplitVerifier</argLine>
          <argLine>--illegal-access=permit</argLine>
          <outputName>surefire-report</outputName>
          <aggregate>true</aggregate>
          <reportsDirectories>
            <reportsDirectory>${project.build.directory}/surefire-reports/</reportsDirectory>
            <reportsDirectory>${project.build.directory}/failsafe-reports/</reportsDirectory>
          </reportsDirectories>
        </configuration>
      </plugin>
      <plugin>
        <artifactId>maven-project-info-reports-plugin</artifactId>
        <version>${project-info-reports.plugin.version}</version>
        <configuration>
          <dependencyLocationsEnabled>false</dependencyLocationsEnabled>
          <dependencyDetailsEnabled>false</dependencyDetailsEnabled>
        </configuration>
      </plugin>
      <plugin>
        <groupId>org.codehaus.mojo</groupId>
        <artifactId>versions-maven-plugin</artifactId>
        <version>${versions.plugin.version}</version>
        <reportSets>
          <reportSet>
            <reports>
              <report>dependency-updates-report</report>
              <report>plugin-updates-report</report>
              <report>property-updates-report</report>
            </reports>
          </reportSet>
        </reportSets>
      </plugin>
      <plugin>
        <groupId>net.alchim31.maven</groupId>
        <artifactId>scala-maven-plugin</artifactId>
        <version>${scala.maven.plugin.version}</version>
        <configuration>
          <args>
            <arg>-no-java-comments</arg>
            <arg>-no-link-warnings</arg>
          </args>
        </configuration>
      </plugin>
    </plugins>
  </reporting>

  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.13.1</version>
    </dependency>
    <dependency>
      <groupId>org.scalatest</groupId>
      <artifactId>scalatest_${scala.binary.version}</artifactId>
      <version>3.0.8</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.scala-lang.modules</groupId>
      <artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>
      <version>1.1.2</version>
    </dependency>
    <dependency>
      <groupId>org.scala-lang</groupId>
      <artifactId>scala-library</artifactId>
      <version>${scala.version}</version>
    </dependency>
    <dependency>
      <groupId>commons-codec</groupId>
      <artifactId>commons-codec</artifactId>
      <version>1.12</version>
    </dependency>
    <dependency>
      <groupId>org.apache.commons</groupId>
      <artifactId>commons-compress</artifactId>
      <version>1.21</version>
    </dependency>
    <dependency>
      <groupId>com.google.guava</groupId>
      <artifactId>guava</artifactId>
      <version>${guava.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-core</artifactId>
      <version>${hadoop.version}</version>
      <exclusions>
        <exclusion>
          <groupId>javax.servlet</groupId>
          <artifactId>servlet-api</artifactId>
        </exclusion>
      </exclusions>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>${hadoop.version}</version>
      <exclusions>
        <exclusion>
          <groupId>javax.servlet</groupId>
          <artifactId>servlet-api</artifactId>
        </exclusion>
      </exclusions>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-core_${scala.binary.version}</artifactId>
      <version>${spark.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-sql_${scala.binary.version}</artifactId>
      <version>${spark.version}</version>
    </dependency>
    <dependency>
      <groupId>org.xerial.snappy</groupId>
      <artifactId>snappy-java</artifactId>
      <version>1.1.7.3</version>
    </dependency>
    <dependency>
      <groupId>org.jsoup</groupId>
      <artifactId>jsoup</artifactId>
      <version>1.15.3</version>
    </dependency>
    <dependency>
      <groupId>org.netpreserve.commons</groupId>
      <artifactId>webarchive-commons</artifactId>
      <version>1.1.9</version>
      <exclusions>
        <exclusion>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-core</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.httpcomponents</groupId>
          <artifactId>httpcore</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.httpcomponents</groupId>
          <artifactId>httpclient</artifactId>
        </exclusion>
        <exclusion>
          <groupId>joda-time</groupId>
          <artifactId>joda-time</artifactId>
        </exclusion>
      </exclusions>
    </dependency>
    <dependency>
      <groupId>com.fasterxml.jackson.core</groupId>
      <artifactId>jackson-databind</artifactId>
      <version>${jackson.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.tika</groupId>
      <artifactId>tika-core</artifactId>
      <version>${tika.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.tika</groupId>
      <artifactId>tika-parsers</artifactId>
      <version>${tika.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.tika</groupId>
      <artifactId>tika-langdetect</artifactId>
      <version>${tika.version}</version>
      <exclusions>
        <exclusion>
          <groupId>com.optimaize.languagedetector</groupId>
          <artifactId>language-detector</artifactId>
        </exclusion>
        <exclusion>
          <groupId>com.google.guava</groupId>
          <artifactId>guava</artifactId>
        </exclusion>
        <exclusion>
          <groupId>com.sun.activation</groupId>
          <artifactId>jakarta.activation</artifactId>
        </exclusion>
      </exclusions>
    </dependency>
    <dependency>
      <groupId>org.rogach</groupId>
      <artifactId>scallop_${scala.binary.version}</artifactId>
      <version>3.3.1</version>
    </dependency>
    <dependency>  <!-- Needed for running boilerpipe, but will compile without. -->
      <groupId>com.syncthemall</groupId>
      <artifactId>boilerpipe</artifactId>
      <version>1.2.2</version>
    </dependency>
    <dependency>  <!-- Needed for running boilerpipe. -->
      <groupId>xerces</groupId>
      <artifactId>xercesImpl</artifactId>
      <version>2.12.2</version>
    </dependency>
    <dependency>
      <groupId>tl.lin</groupId>
      <artifactId>lintools-datatypes</artifactId>
      <version>1.1.1</version>
    </dependency>
    <!--START pull #321-->
    <dependency>
      <groupId>com.github.netarchivesuite</groupId>
      <artifactId>language-detector</artifactId>
      <version>language-detector-0.6a</version>
    </dependency>
    <!--END pull #321-->
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-aws</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
    <dependency>
      <groupId>com.github.internetarchive</groupId>
      <artifactId>Sparkling</artifactId>
      <version>main-f002a0509e-1</version>
    </dependency>
  </dependencies>

  <developers>
    <developer>
      <name>Jimmy Lin</name>
      <id>lintool</id>
      <email>jimmylin at uwaterloo dot ca</email>
      <organization>University of Waterloo</organization>
      <url>https://cs.uwaterloo.ca/~jimmylin/</url>
      <timezone>-5</timezone>
    </developer>
    <developer>
      <name>Ian Milligan</name>
      <id>ianmilligan1</id>
      <email>i2millig at uwaterloo dot ca</email>
      <organization>University of Waterloo</organization>
      <url>http://ianmilligan.ca/</url>
      <timezone>-5</timezone>
    </developer>
    <developer>
      <name>Nick Ruest</name>
      <id>ruebot</id>
      <email>ruestn at yorku dot ca</email>
      <organization>York University</organization>
      <url>https://ruebot.net</url>
      <timezone>-5</timezone>
    </developer>
  </developers>

  <issueManagement>
    <system>GitHub</system>
    <url>https://github.com/archivesunleashed/aut/issues</url>
  </issueManagement>

  <profiles>
    <profile>
      <id>release-sign-artifacts</id>
      <activation>
        <property>
          <name>performRelease</name>
          <value>true</value>
        </property>
      </activation>
      <build>
        <pluginManagement>
          <plugins>
            <plugin>
              <artifactId>maven-gpg-plugin</artifactId>
              <version>${gpg.plugin.version}</version>
              <executions>
                <execution>
                  <id>sign-artifacts</id>
                  <phase>verify</phase>
                  <goals>
                    <goal>sign</goal>
                  </goals>
                </execution>
              </executions>
              <configuration>
                <useAgent>true</useAgent>
              </configuration>
            </plugin>
          </plugins>
        </pluginManagement>
        <plugins>
          <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-gpg-plugin</artifactId>
            <version>${gpg.plugin.version}</version>
          </plugin>
        </plugins>
      </build>
    </profile>
  </profiles>

  <pluginRepositories>
    <pluginRepository>
      <id>sonatype-nexus-snapshots</id>
      <name>Sonatype Nexus Snapshots</name>
      <url>https://oss.sonatype.org/content/repositories/snapshots</url>
      <releases>
        <enabled>false</enabled>
      </releases>
      <snapshots>
        <enabled>true</enabled>
      </snapshots>
    </pluginRepository>

    <pluginRepository>
      <id>sonatype-nexus-staging</id>
      <name>Nexus Release Repository</name>
      <url>https://oss.sonatype.org/content/repositories/releases</url>
      <releases>
        <enabled>true</enabled>
      </releases>
      <snapshots>
        <enabled>false</enabled>
      </snapshots>
    </pluginRepository>
  </pluginRepositories>

  <distributionManagement>
    <site>
      <id>gh-pages</id>
      <name>Deployment through GitHub's site deployment plugin</name>
      <url>${project.version}</url>
    </site>
    <snapshotRepository>
      <id>sonatype-nexus-snapshots</id>
      <name>Sonatype Nexus Shapshots</name>
      <url>https://oss.sonatype.org/content/repositories/snapshots/</url>
    </snapshotRepository>
    <repository>
      <id>sonatype-nexus-staging</id>
      <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
    </repository>
  </distributionManagement>

</project>
