wordle-clone/cmd/prep-words/main.go

168 lines
3.9 KiB
Go
Raw Normal View History

2025-01-24 22:45:55 +00:00
package main
import (
"bufio"
2025-01-25 00:30:04 +00:00
"bytes"
2025-01-24 22:45:55 +00:00
"encoding/json"
"flag"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/bitfield/script"
gonanoid "github.com/matoous/go-nanoid"
2025-01-24 22:45:55 +00:00
"log"
2025-01-25 00:30:04 +00:00
"math/rand/v2"
2025-01-24 22:45:55 +00:00
"os"
2025-01-25 00:30:04 +00:00
"path/filepath"
"regexp"
2025-01-24 22:45:55 +00:00
"sort"
"strings"
2025-01-25 00:30:04 +00:00
"time"
2025-01-24 22:45:55 +00:00
)
const maxWordLength = 7
var validWordRegex = regexp.MustCompile(`^[a-z]+$`)
type dataStruct struct {
VersionID string `json:"versionId"`
GuessWords map[int][]string `json:"guessWords"`
OtherWords map[int][]string `json:"otherWords"`
ShufflePattern map[int][]int `json:"shufflePattern"`
2025-01-25 00:30:04 +00:00
}
2025-01-24 22:45:55 +00:00
func main() {
dictFile := flag.String("dict", "./dict", "directory of dictionary of word to prep")
2025-01-25 00:30:04 +00:00
outDir := flag.String("out", "./site/assets/data", "output directory")
2025-01-24 22:45:55 +00:00
flag.Parse()
2025-01-25 00:30:04 +00:00
r := rand.New(rand.NewPCG(uint64(time.Now().UnixNano()), uint64(time.Now().UnixNano())))
data := dataStruct{
VersionID: gonanoid.MustID(12),
GuessWords: make(map[int][]string),
OtherWords: make(map[int][]string),
ShufflePattern: make(map[int][]int),
2025-01-24 22:45:55 +00:00
}
guessWords := make(map[string]bool)
otherWords := make(map[string]bool)
if err := scanSuitableWords(*dictFile, func(easy bool, word string) {
w := strings.TrimSpace(word)
if !validWordRegex.MatchString(w) {
return
}
if len(w) >= 4 && len(w) <= maxWordLength {
if easy {
guessWords[word] = true
} else {
otherWords[word] = true
}
2025-01-24 22:45:55 +00:00
}
}); err != nil {
log.Fatal(err)
}
for w := range guessWords {
data.GuessWords[len(w)] = append(data.GuessWords[len(w)], w)
}
for w := range otherWords {
data.OtherWords[len(w)] = append(data.OtherWords[len(w)], w)
2025-01-24 22:45:55 +00:00
}
for k, word := range data.GuessWords {
log.Printf("Found %d guess words of length %v", len(word), k)
sort.Strings(word)
2025-01-25 00:30:04 +00:00
}
for k, word := range data.OtherWords {
log.Printf("Found %d other words of length %v", len(word), k)
sort.Strings(word)
2025-01-25 00:30:04 +00:00
}
for k := range data.GuessWords {
pattern := make([]int, len(data.GuessWords[k]))
for i := range data.GuessWords[k] {
2025-01-25 00:30:04 +00:00
pattern[i] = i
}
for x := 12; x < r.IntN(8)+16; x++ {
2025-01-25 00:30:04 +00:00
r.Shuffle(len(pattern), func(i, j int) {
pattern[i], pattern[j] = pattern[j], pattern[i]
})
}
// TODO: shuffle
data.ShufflePattern[k] = pattern
2025-01-25 00:30:04 +00:00
}
var wordData bytes.Buffer
if err := json.NewEncoder(&wordData).Encode(data); err != nil {
2025-01-25 00:30:04 +00:00
log.Fatal(err)
}
if err := os.WriteFile(filepath.Join(*outDir, "data.json"), wordData.Bytes(), 0644); err != nil {
2025-01-24 22:45:55 +00:00
log.Fatal(err)
}
}
func scanSuitableWords(dictDir string, withWord func(easy bool, word string)) error {
if err := scanSuitableWordsFromWordListHTML(filepath.Join(dictDir, "oxford-word-list.htm"), withWord); err != nil {
return err
}
if err := scanSuitableWordsFromOxford3000(filepath.Join(dictDir, "The_Oxford_3000.txt"), withWord); err != nil {
return err
}
if err := scanSuitableWordsFromEnGB(filepath.Join(dictDir, "en_GB.dic"), filepath.Join(dictDir, "en_GB.aff"), withWord); err != nil {
return err
}
return nil
}
func scanSuitableWordsFromWordListHTML(dictFile string, withWord func(easy bool, word string)) error {
f, err := os.Open(dictFile)
if err != nil {
return err
}
defer f.Close()
dom, err := goquery.NewDocumentFromReader(f)
if err != nil {
return err
}
dom.Find("table.t tbody tr td.t:nth-child(2)").Each(func(i int, s *goquery.Selection) {
withWord(true, s.Text())
})
return nil
}
func scanSuitableWordsFromOxford3000(dictFile string, withWord func(easy bool, word string)) error {
f, err := os.Open(dictFile)
if err != nil {
return err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
withWord(true, scanner.Text())
}
return scanner.Err()
}
func scanSuitableWordsFromEnGB(dictFile, affFile string, withWord func(easy bool, word string)) error {
words, err := script.Exec(fmt.Sprintf("unmunch '%v' '%v'", dictFile, affFile)).String()
2025-01-24 22:45:55 +00:00
if err != nil {
return err
}
for _, word := range strings.Split(words, "\n") {
withWord(false, word)
2025-01-24 22:45:55 +00:00
}
return nil
2025-01-24 22:45:55 +00:00
}