167 lines
3.7 KiB
Go
167 lines
3.7 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/bitfield/script"
|
|
gonanoid "github.com/matoous/go-nanoid"
|
|
"log"
|
|
"math/rand/v2"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const maxWordLength = 7
|
|
|
|
var validWordRegex = regexp.MustCompile(`^[a-z]+$`)
|
|
|
|
type wordList struct {
|
|
Words map[int][]string `json:"words"`
|
|
}
|
|
|
|
type shufflePattern struct {
|
|
ID string `json:"id"`
|
|
Index map[int][]int `json:"index"`
|
|
}
|
|
|
|
func main() {
|
|
dictFile := flag.String("dict", "./dict", "directory of dictionary of word to prep")
|
|
outDir := flag.String("out", "./site/assets/data", "output directory")
|
|
flag.Parse()
|
|
|
|
r := rand.New(rand.NewPCG(uint64(time.Now().UnixNano()), uint64(time.Now().UnixNano())))
|
|
|
|
words := wordList{
|
|
Words: make(map[int][]string),
|
|
}
|
|
|
|
wordSet := make(map[string]bool)
|
|
if err := scanSuitableWords(*dictFile, func(word string) {
|
|
w := strings.TrimSpace(word)
|
|
if !validWordRegex.MatchString(w) {
|
|
return
|
|
}
|
|
|
|
if len(w) >= 4 && len(w) <= maxWordLength {
|
|
wordSet[word] = true
|
|
}
|
|
}); err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
for w := range wordSet {
|
|
words.Words[len(w)] = append(words.Words[len(w)], w)
|
|
}
|
|
|
|
for k, word := range words.Words {
|
|
log.Printf("Found %d words of length %v", len(word), k)
|
|
sort.Strings(word)
|
|
}
|
|
|
|
var wordData bytes.Buffer
|
|
if err := json.NewEncoder(&wordData).Encode(words); err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(*outDir, "words.json"), wordData.Bytes(), 0644); err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
// Generate a shuffle pattern
|
|
shp := shufflePattern{
|
|
ID: gonanoid.MustID(12),
|
|
Index: make(map[int][]int),
|
|
}
|
|
for k := range words.Words {
|
|
pattern := make([]int, len(words.Words[k]))
|
|
for i := range words.Words[k] {
|
|
pattern[i] = i
|
|
}
|
|
|
|
for x := 12; x < r.IntN(8)+16; x++ {
|
|
r.Shuffle(len(pattern), func(i, j int) {
|
|
pattern[i], pattern[j] = pattern[j], pattern[i]
|
|
})
|
|
}
|
|
|
|
// TODO: shuffle
|
|
shp.Index[k] = pattern
|
|
}
|
|
|
|
var patternData bytes.Buffer
|
|
if err := json.NewEncoder(&patternData).Encode(shp); err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(*outDir, "shuffle_pattern.json"), patternData.Bytes(), 0644); err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
}
|
|
|
|
func scanSuitableWords(dictDir string, withWord func(word string)) error {
|
|
if err := scanSuitableWordsFromWordListHTML(filepath.Join(dictDir, "oxford-word-list.htm"), withWord); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := scanSuitableWordsFromOxford3000(filepath.Join(dictDir, "The_Oxford_3000.txt"), withWord); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := scanSuitableWordsFromEnGB(filepath.Join(dictDir, "en_GB.dic"), filepath.Join(dictDir, "en_GB.aff"), withWord); err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func scanSuitableWordsFromWordListHTML(dictFile string, withWord func(word string)) error {
|
|
f, err := os.Open(dictFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
dom, err := goquery.NewDocumentFromReader(f)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
dom.Find("table.t tbody tr td.t:nth-child(2)").Each(func(i int, s *goquery.Selection) {
|
|
withWord(s.Text())
|
|
})
|
|
return nil
|
|
}
|
|
|
|
func scanSuitableWordsFromOxford3000(dictFile string, withWord func(word string)) error {
|
|
f, err := os.Open(dictFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
for scanner.Scan() {
|
|
withWord(scanner.Text())
|
|
}
|
|
|
|
return scanner.Err()
|
|
}
|
|
|
|
func scanSuitableWordsFromEnGB(dictFile, affFile string, withWord func(word string)) error {
|
|
words, err := script.Exec(fmt.Sprintf("unmunch '%v' '%v'", dictFile, affFile)).String()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, word := range strings.Split(words, "\n") {
|
|
withWord(word)
|
|
}
|
|
return nil
|
|
}
|