package main import ( "bufio" "bytes" "encoding/json" "flag" "fmt" "github.com/PuerkitoBio/goquery" "github.com/bitfield/script" gonanoid "github.com/matoous/go-nanoid" "log" "math/rand/v2" "os" "path/filepath" "regexp" "sort" "strings" "time" ) const maxWordLength = 7 var validWordRegex = regexp.MustCompile(`^[a-z]+$`) type wordList struct { Words map[int][]string `json:"words"` } type shufflePattern struct { ID string `json:"id"` Index map[int][]int `json:"index"` } func main() { dictFile := flag.String("dict", "./dict", "directory of dictionary of word to prep") outDir := flag.String("out", "./site/assets/data", "output directory") flag.Parse() r := rand.New(rand.NewPCG(uint64(time.Now().UnixNano()), uint64(time.Now().UnixNano()))) words := wordList{ Words: make(map[int][]string), } wordSet := make(map[string]bool) if err := scanSuitableWords(*dictFile, func(word string) { w := strings.TrimSpace(word) if !validWordRegex.MatchString(w) { return } if len(w) >= 4 && len(w) <= maxWordLength { wordSet[word] = true } }); err != nil { log.Fatal(err) } for w := range wordSet { words.Words[len(w)] = append(words.Words[len(w)], w) } for k, word := range words.Words { log.Printf("Found %d words of length %v", len(word), k) sort.Strings(word) } var wordData bytes.Buffer if err := json.NewEncoder(&wordData).Encode(words); err != nil { log.Fatal(err) } if err := os.WriteFile(filepath.Join(*outDir, "words.json"), wordData.Bytes(), 0644); err != nil { log.Fatal(err) } // Generate a shuffle pattern shp := shufflePattern{ ID: gonanoid.MustID(12), Index: make(map[int][]int), } for k := range words.Words { pattern := make([]int, len(words.Words[k])) for i := range words.Words[k] { pattern[i] = i } for x := 12; x < r.IntN(8)+16; x++ { r.Shuffle(len(pattern), func(i, j int) { pattern[i], pattern[j] = pattern[j], pattern[i] }) } // TODO: shuffle shp.Index[k] = pattern } var patternData bytes.Buffer if err := json.NewEncoder(&patternData).Encode(shp); err != nil { log.Fatal(err) } if err := os.WriteFile(filepath.Join(*outDir, "shuffle_pattern.json"), patternData.Bytes(), 0644); err != nil { log.Fatal(err) } } func scanSuitableWords(dictDir string, withWord func(word string)) error { if err := scanSuitableWordsFromWordListHTML(filepath.Join(dictDir, "oxford-word-list.htm"), withWord); err != nil { return err } if err := scanSuitableWordsFromOxford3000(filepath.Join(dictDir, "The_Oxford_3000.txt"), withWord); err != nil { return err } if err := scanSuitableWordsFromEnGB(filepath.Join(dictDir, "en_GB.dic"), filepath.Join(dictDir, "en_GB.aff"), withWord); err != nil { return err } return nil } func scanSuitableWordsFromWordListHTML(dictFile string, withWord func(word string)) error { f, err := os.Open(dictFile) if err != nil { return err } defer f.Close() dom, err := goquery.NewDocumentFromReader(f) if err != nil { return err } dom.Find("table.t tbody tr td.t:nth-child(2)").Each(func(i int, s *goquery.Selection) { withWord(s.Text()) }) return nil } func scanSuitableWordsFromOxford3000(dictFile string, withWord func(word string)) error { f, err := os.Open(dictFile) if err != nil { return err } defer f.Close() scanner := bufio.NewScanner(f) for scanner.Scan() { withWord(scanner.Text()) } return scanner.Err() } func scanSuitableWordsFromEnGB(dictFile, affFile string, withWord func(word string)) error { words, err := script.Exec(fmt.Sprintf("unmunch '%v' '%v'", dictFile, affFile)).String() if err != nil { return err } for _, word := range strings.Split(words, "\n") { withWord(word) } return nil }