/*
 * Decompiled with CFR 0.152.
 */
package net.kafujo.samples.wikidata;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;

public class WikidataCityCleaner {
    public static List<String> cleanRawCvs(Path raw) throws IOException {
        HashSet<String> firstColumn = new HashSet<String>();
        LinkedList<String> collect = new LinkedList<String>();
        int ignoreCount = 0;
        int totalCount = 0;
        for (String line : Files.readAllLines(raw)) {
            ++totalCount;
            String begin1 = line.substring(0, line.indexOf(","));
            if (firstColumn.add(begin1)) {
                collect.add(line.replace("http://www.wikidata.org/entity/", ""));
                continue;
            }
            ++ignoreCount;
        }
        System.out.println("TOTAL   : " + totalCount);
        System.out.println("IGNORED : " + ignoreCount);
        System.out.println("UNIQUE  :  " + collect.size());
        assert (firstColumn.size() == collect.size());
        return collect;
    }

    public static void main(String[] args) throws IOException {
        List<String> set = WikidataCityCleaner.cleanRawCvs(Path.of("/opt/tmp/query.csv", new String[0]));
        Path dest = Path.of("/opt/tmp/cleaned.csv", new String[0]);
        Files.write(dest, set, new OpenOption[0]);
    }
}

