package eu.dnetlib.iis.ingest.pmc.plaintext;

import eu.dnetlib.iis.metadataextraction.schemas.DocumentText;
import java.io.IOException;
import java.io.StringReader;
import org.apache.avro.mapred.AvroKey;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;

/* loaded from: input_file:eu/dnetlib/iis/ingest/pmc/plaintext/DocumentTextImporter.class */
public class DocumentTextImporter extends Mapper<AvroKey<DocumentText>, NullWritable, AvroKey<DocumentText>, NullWritable> {
    private final Logger log = Logger.getLogger(DocumentTextImporter.class);

    protected void map(AvroKey<DocumentText> avroKey, NullWritable nullWritable, Mapper<AvroKey<DocumentText>, NullWritable, AvroKey<DocumentText>, NullWritable>.Context context) throws IOException, InterruptedException {
        DocumentText documentText = (DocumentText) avroKey.datum();
        String str = null;
        if (documentText.getText() != null) {
            try {
                SAXBuilder sAXBuilder = new SAXBuilder();
                sAXBuilder.setValidation(false);
                sAXBuilder.setFeature("http://xml.org/sax/features/validation", false);
                sAXBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
                sAXBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
                str = NlmToDocumentTextConverter.getDocumentText(sAXBuilder.build(new StringReader(documentText.getText().toString())).getRootElement());
                this.log.info("Text extracted for id: " + ((Object) documentText.getId()));
            } catch (JDOMException e) {
                this.log.error("Text extraction failed for id " + ((Object) documentText.getId()) + " :" + e.getMessage());
            }
        }
        context.write(new AvroKey(DocumentText.newBuilder().setId(documentText.getId()).setText(str).build()), NullWritable.get());
    }

    protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
        map((AvroKey<DocumentText>) obj, (NullWritable) obj2, (Mapper<AvroKey<DocumentText>, NullWritable, AvroKey<DocumentText>, NullWritable>.Context) context);
    }
}
