package systems.dmx.pdfsearch;

import java.util.Arrays;
import java.util.logging.Logger;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.text.PDFTextStripper;
import systems.dmx.core.ChildTopics;
import systems.dmx.core.RelatedTopic;
import systems.dmx.core.Topic;
import systems.dmx.core.osgi.PluginActivator;
import systems.dmx.core.service.Inject;
import systems.dmx.core.service.event.PostCreateTopic;
import systems.dmx.files.FilesService;
import systems.dmx.tesseract.TesseractService;

/* loaded from: input_file:systems/dmx/pdfsearch/PDFSearchPlugin.class */
public class PDFSearchPlugin extends PluginActivator implements PostCreateTopic {

    @Inject
    private FilesService files;

    @Inject
    private TesseractService tesseract;
    private Logger logger = Logger.getLogger(getClass().getName());

    public void postCreateTopic(Topic topic) {
        if (topic.getTypeUri().equals("dmx.files.file")) {
            ChildTopics childTopics = topic.getChildTopics();
            String simpleValue = childTopics.getTopic("dmx.files.path").getSimpleValue().toString();
            RelatedTopic topicOrNull = childTopics.getTopicOrNull("dmx.files.media_type");
            if (topicOrNull == null) {
                this.logger.warning("No media type set for File topic " + topic.getId() + ", path=\"" + simpleValue + OperatorName.SHOW_TEXT_LINE_AND_SPACE);
            } else if (topicOrNull.getSimpleValue().toString().equals("application/pdf")) {
                this.logger.info("### Indexing PDF file \"" + simpleValue + OperatorName.SHOW_TEXT_LINE_AND_SPACE);
                indexPDF(simpleValue, topic.getId());
            }
        }
    }

    private void indexPDF(String str, long j) {
        try {
            String text = new PDFTextStripper().getText(Loader.loadPDF(this.files.getFile(str)));
            this.logger.fine(OperatorName.SHOW_TEXT_LINE_AND_SPACE + text + OperatorName.SHOW_TEXT_LINE_AND_SPACE);
            this.logger.info(text.length() + " characters extracted" + (text.length() < 100 ? "\n" + Arrays.toString(text.getBytes()) : ""));
            if (isTextAvailable(text)) {
                this.dmx.indexTopicFulltext(j, text, "dmx.files.file");
            } else {
                this.dmx.indexTopicFulltext(j, this.tesseract.doOCR(str), "dmx.files.file");
            }
        } catch (Exception e) {
            throw new RuntimeException("Indexing PDF failed, path=\"" + str + "\", File topicId=" + j, e);
        }
    }

    private boolean isTextAvailable(String str) {
        return str.chars().anyMatch(i -> {
            return i != 10;
        });
    }
}
