import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core';
import * as pdfjsLib from 'pdfjs-dist';
import * as Models from '../../models';

import { encode } from 'gpt-tokenizer';

// const pdfjsLib = await import('pdfjs-dist/build/pdf');
// const pdfjsWorker = await import('pdfjs-dist/build/pdf.worker.entry');

@Component({
  standalone: true,
  selector: 'playground-pdf-viewer',
  template: `
    <div>
      <button (click)="loadPdf(pdfPath)">Load PDF</button>
      <pre>{{ allText }}</pre>
    </div>
  `,
})
export class PdfViewerComponent implements OnInit {
  allText = '';
  _pdfPath = '';
  @Input() set pdfPath(path: string) {
    this._pdfPath = path;
    if (path) {
      this.loadPdf(path);
    }
  }

  get pdfPath() {
    return this._pdfPath;
  }

  @Input() pdfName!: string;

  ngOnInit() {
    pdfjsLib.GlobalWorkerOptions.workerSrc = './assets/pdfjs/pdf.worker.min.mjs';
  }

  @Output() readingProgress = new EventEmitter<{
    progress: number;
    remainTime: number;
  }>();

  @Output() pdfChange = new EventEmitter<Models.PDF>();

  async loadPdf(pdfPath: string) {
    this.allText = '';
    // const pdfPath = '/assets/test.pdf';
    const loadingTask = pdfjsLib.getDocument(pdfPath);

    const pdf = await loadingTask.promise;
    const numPages = pdf.numPages;
    const initTime = Date.now();
    let curTime = initTime;
    for (let i = 1; i <= numPages; i++) {
      const page = await pdf.getPage(i);
      const textContent = await page.getTextContent({
        includeMarkedContent: false,
      });
      curTime = Date.now();
      this.readingProgress.emit({
        progress: Math.floor((i / numPages) * 100),
        remainTime: Math.round((((curTime - initTime) / i) * (numPages - i)) / 1000),
      });
      const strings = textContent.items.map((item) => {
        if ('str' in item) {
          // Now TypeScript knows that `item` has a `str` property.
          return item.str;
        }
        return '';
      });

      this.allText += strings.join(' ') + '\n';
    }
    // eslint-disable-next-line no-control-regex
    this.allText = this.allText.replace(/\u0000/g, '');

    const cleanedText = this.cleanContent(this.allText);
    const encodedText = encode(cleanedText);

    this.pdfChange.emit({
      name: this.pdfName,
      content: cleanedText,
      token: encodedText.length,
    });
  }

  cleanContent(content: string): string {
    return content
      .replace(/[^\w\s.-]/g, '')
      .replace(/\s+/g, ' ')
      .trim();
  }
}
