package main import ( "bufio" "bytes" "encoding/json" "flag" "fmt" "github.com/PuerkitoBio/goquery" "github.com/bitfield/script" gonanoid "github.com/matoous/go-nanoid" "log" "math/rand/v2" "os" "path/filepath" "regexp" "sort" "strings" "time" ) const maxWordLength = 7 var validWordRegex = regexp.MustCompile(`^[a-z]+$`) type dataStruct struct { VersionID string `json:"versionId"` GuessWords map[int][]string `json:"guessWords"` OtherWords map[int][]string `json:"otherWords"` ShufflePattern map[int][]int `json:"shufflePattern"` } func main() { dictFile := flag.String("dict", "./dict", "directory of dictionary of word to prep") outDir := flag.String("out", "./site/assets/data", "output directory") flag.Parse() r := rand.New(rand.NewPCG(uint64(time.Now().UnixNano()), uint64(time.Now().UnixNano()))) data := dataStruct{ VersionID: gonanoid.MustID(12), GuessWords: make(map[int][]string), OtherWords: make(map[int][]string), ShufflePattern: make(map[int][]int), } guessWords := make(map[string]bool) otherWords := make(map[string]bool) if err := scanSuitableWords(*dictFile, func(easy bool, word string) { w := strings.TrimSpace(word) if !validWordRegex.MatchString(w) { return } if len(w) >= 4 && len(w) <= maxWordLength { if easy { guessWords[word] = true } else { otherWords[word] = true } } }); err != nil { log.Fatal(err) } for w := range guessWords { data.GuessWords[len(w)] = append(data.GuessWords[len(w)], w) } for w := range otherWords { data.OtherWords[len(w)] = append(data.OtherWords[len(w)], w) } for k, word := range data.GuessWords { log.Printf("Found %d guess words of length %v", len(word), k) sort.Strings(word) } for k, word := range data.OtherWords { log.Printf("Found %d other words of length %v", len(word), k) sort.Strings(word) } for k := range data.GuessWords { pattern := make([]int, len(data.GuessWords[k])) for i := range data.GuessWords[k] { pattern[i] = i } for x := 12; x < r.IntN(8)+16; x++ { r.Shuffle(len(pattern), func(i, j int) { pattern[i], pattern[j] = pattern[j], pattern[i] }) } // TODO: shuffle data.ShufflePattern[k] = pattern } var wordData bytes.Buffer if err := json.NewEncoder(&wordData).Encode(data); err != nil { log.Fatal(err) } if err := os.WriteFile(filepath.Join(*outDir, "data.json"), wordData.Bytes(), 0644); err != nil { log.Fatal(err) } } func scanSuitableWords(dictDir string, withWord func(easy bool, word string)) error { if err := scanSuitableWordsFromWordListHTML(filepath.Join(dictDir, "oxford-word-list.htm"), withWord); err != nil { return err } if err := scanSuitableWordsFromOxford3000(filepath.Join(dictDir, "The_Oxford_3000.txt"), withWord); err != nil { return err } if err := scanSuitableWordsFromEnGB(filepath.Join(dictDir, "en_GB.dic"), filepath.Join(dictDir, "en_GB.aff"), withWord); err != nil { return err } return nil } func scanSuitableWordsFromWordListHTML(dictFile string, withWord func(easy bool, word string)) error { f, err := os.Open(dictFile) if err != nil { return err } defer f.Close() dom, err := goquery.NewDocumentFromReader(f) if err != nil { return err } dom.Find("table.t tbody tr td.t:nth-child(2)").Each(func(i int, s *goquery.Selection) { withWord(true, s.Text()) }) return nil } func scanSuitableWordsFromOxford3000(dictFile string, withWord func(easy bool, word string)) error { f, err := os.Open(dictFile) if err != nil { return err } defer f.Close() scanner := bufio.NewScanner(f) for scanner.Scan() { withWord(true, scanner.Text()) } return scanner.Err() } func scanSuitableWordsFromEnGB(dictFile, affFile string, withWord func(easy bool, word string)) error { words, err := script.Exec(fmt.Sprintf("unmunch '%v' '%v'", dictFile, affFile)).String() if err != nil { return err } for _, word := range strings.Split(words, "\n") { withWord(false, word) } return nil }