import {BlockType} from 'domain/Report'
import {countPunctuation, tokenizeJapaneseCharacters} from 'domain/utils/textUtils'


export const billableWordCount = (text: string) => {
  return tokenizeJapaneseCharacters(text).length + countPunctuation(text)
}

export const billableWordCountWithoutPunctuation = (text: string) => {
  return tokenizeJapaneseCharacters(text).length
}

export const getWordCount = (htmlDiff: string, blockType: BlockType) => {
  if (blockType === BlockType.EXCEL_TABLE) {
    htmlDiff = htmlDiff.replace(/<table.*data-excel-table="true"[^>]*>(.*)<\/table>/s, '')
  }

  const regex = /<ins class="(diffins|diffmod)[^"]*">(.+?(?=<\/ins>))<\/ins>/gs
  const matches = Array.from(htmlDiff.matchAll(regex))
  const insertions = matches.map(match => match[2].removeHTMLTags())
  return textWordCount(insertions)
}

const textWordCount = (insertionContent: string[]) => {
  return insertionContent.reduce((count: number, content: string) => {
    return count + billableWordCount(content)
  }, 0)
}

export const getWordCountFromPreparedHTML = (html: string) => {
  const regex = /data-word-count="(\d+)"/g
  const matches = Array.from(html.matchAll(regex))
  const wordCounts = matches.map(match => parseInt(match[1], 10))
  return wordCounts.reduce((acc, x) => acc + x, 0)
}