import Mammoth from 'mammoth';
import { pdfjs } from 'react-pdf';

pdfjs.GlobalWorkerOptions.workerSrc = `//unpkg.com/pdfjs-dist@3.11.174/build/pdf.worker.js`;

const extractPDFText = async (file) => {
    return new Promise(async (resolve, reject) => {

        const pdf = await pdfjs.getDocument(URL.createObjectURL(file)).promise;
        let text = '';
        for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
            const page = await pdf.getPage(pageNum);
            const content = await page.getTextContent();
            text += content.items.map(item => item.str).join(' ');
        }
        resolve(text); 
    })
};

const extractWordText = async (file) => {
    const arrayBuffer = await file.arrayBuffer();
    const result = await Mammoth.convertToHtml({ arrayBuffer });
    return result.value;
};

const extractText = async (fileType, file) => {
    if (fileType === 'application/pdf') {
        return await extractPDFText(file);
    } else if (fileType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
        return await extractWordText(file)
    } else {
        throw new Error('Unsupported file type. Please upload a PDF or Word (.docx) file.');
    }
}

const TextExtractor = {
    extractText
}

export default TextExtractor;