Add Obsidian vault import feature (#8)
- New 'Import Obsidian' action on site settings page - Upload a zip file of an Obsidian vault to import all notes as posts - Markdown notes imported with title from filename, published date from file timestamp, and body with front-matter stripped - Images and other attachments saved as Upload records - New obsimport service handles zip traversal and import logic - Unit tests for front-matter stripping Co-authored-by: Shelley <shelley@exe.dev> Co-authored-by: exe.dev user <exedev@kernel-leviathan.exe.xyz> Reviewed-on: #8
This commit is contained in:
parent
d21aeadd56
commit
a3197f9b11
8 changed files with 376 additions and 0 deletions
229
services/obsimport/service.go
Normal file
229
services/obsimport/service.go
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
package obsimport
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"mime"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"lmika.dev/lmika/weiro/models"
|
||||
"lmika.dev/lmika/weiro/providers/db"
|
||||
"lmika.dev/lmika/weiro/providers/uploadfiles"
|
||||
"lmika.dev/lmika/weiro/services/publisher"
|
||||
)
|
||||
|
||||
type Service struct {
|
||||
db *db.Provider
|
||||
up *uploadfiles.Provider
|
||||
publisher *publisher.Queue
|
||||
scratchDir string
|
||||
}
|
||||
|
||||
func New(db *db.Provider, up *uploadfiles.Provider, publisher *publisher.Queue, scratchDir string) *Service {
|
||||
return &Service{
|
||||
db: db,
|
||||
up: up,
|
||||
publisher: publisher,
|
||||
scratchDir: scratchDir,
|
||||
}
|
||||
}
|
||||
|
||||
type ImportResult struct {
|
||||
PostsImported int
|
||||
UploadsImported int
|
||||
}
|
||||
|
||||
func (s *Service) ImportZip(ctx context.Context, zipPath string) (ImportResult, error) {
|
||||
site, ok := models.GetSite(ctx)
|
||||
if !ok {
|
||||
return ImportResult{}, models.SiteRequiredError
|
||||
}
|
||||
|
||||
zr, err := zip.OpenReader(zipPath)
|
||||
if err != nil {
|
||||
return ImportResult{}, fmt.Errorf("open zip: %w", err)
|
||||
}
|
||||
defer zr.Close()
|
||||
|
||||
var result ImportResult
|
||||
|
||||
for _, f := range zr.File {
|
||||
if f.FileInfo().IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
ext := strings.ToLower(filepath.Ext(f.Name))
|
||||
if ext == ".md" || ext == ".markdown" {
|
||||
if err := s.importNote(ctx, site, f); err != nil {
|
||||
log.Printf("warn: skipping note %s: %v", f.Name, err)
|
||||
continue
|
||||
}
|
||||
result.PostsImported++
|
||||
} else if isAttachment(ext) {
|
||||
if err := s.importAttachment(ctx, site, f); err != nil {
|
||||
log.Printf("warn: skipping attachment %s: %v", f.Name, err)
|
||||
continue
|
||||
}
|
||||
result.UploadsImported++
|
||||
}
|
||||
}
|
||||
|
||||
s.publisher.Queue(site)
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *Service) importNote(ctx context.Context, site models.Site, f *zip.File) error {
|
||||
rc, err := f.Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rc.Close()
|
||||
|
||||
data, err := io.ReadAll(rc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
body := stripFrontMatter(string(data))
|
||||
title := strings.TrimSuffix(filepath.Base(f.Name), filepath.Ext(f.Name))
|
||||
publishedAt := f.Modified
|
||||
if publishedAt.IsZero() {
|
||||
publishedAt = time.Now()
|
||||
}
|
||||
|
||||
renderTZ, err := time.LoadLocation(site.Timezone)
|
||||
if err != nil {
|
||||
renderTZ = time.UTC
|
||||
}
|
||||
publishedAt = publishedAt.In(renderTZ)
|
||||
|
||||
post := &models.Post{
|
||||
SiteID: site.ID,
|
||||
GUID: models.NewNanoID(),
|
||||
State: models.StatePublished,
|
||||
Title: title,
|
||||
Body: body,
|
||||
CreatedAt: time.Now(),
|
||||
UpdatedAt: time.Now(),
|
||||
PublishedAt: publishedAt,
|
||||
}
|
||||
post.Slug = post.BestSlug()
|
||||
|
||||
return s.db.SavePost(ctx, post)
|
||||
}
|
||||
|
||||
func (s *Service) importAttachment(ctx context.Context, site models.Site, f *zip.File) error {
|
||||
rc, err := f.Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rc.Close()
|
||||
|
||||
// Write to a temp file in scratch dir
|
||||
if err := os.MkdirAll(s.scratchDir, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tmpFile, err := os.CreateTemp(s.scratchDir, "obsimport-*"+filepath.Ext(f.Name))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tmpPath := tmpFile.Name()
|
||||
|
||||
if _, err := io.Copy(tmpFile, rc); err != nil {
|
||||
tmpFile.Close()
|
||||
os.Remove(tmpPath)
|
||||
return err
|
||||
}
|
||||
tmpFile.Close()
|
||||
|
||||
filename := filepath.Base(f.Name)
|
||||
mimeType := mime.TypeByExtension(filepath.Ext(filename))
|
||||
if mimeType == "" {
|
||||
mimeType = "application/octet-stream"
|
||||
}
|
||||
|
||||
stat, err := os.Stat(tmpPath)
|
||||
if err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return err
|
||||
}
|
||||
|
||||
newUploadGUID := models.NewNanoID()
|
||||
newTime := time.Now().UTC()
|
||||
newSlug := filepath.Join(
|
||||
fmt.Sprintf("%04d", newTime.Year()),
|
||||
fmt.Sprintf("%02d", newTime.Month()),
|
||||
newUploadGUID+filepath.Ext(filename),
|
||||
)
|
||||
|
||||
newUpload := models.Upload{
|
||||
SiteID: site.ID,
|
||||
GUID: models.NewNanoID(),
|
||||
FileSize: stat.Size(),
|
||||
MIMEType: mimeType,
|
||||
Filename: filename,
|
||||
CreatedAt: newTime,
|
||||
Slug: newSlug,
|
||||
}
|
||||
if err := s.db.SaveUpload(ctx, &newUpload); err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.up.AdoptFile(site, newUpload, tmpPath); err != nil {
|
||||
os.Remove(tmpPath)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// stripFrontMatter removes YAML front matter (delimited by ---) from markdown content.
|
||||
func stripFrontMatter(content string) string {
|
||||
scanner := bufio.NewScanner(strings.NewReader(content))
|
||||
|
||||
// Check if the first line is a front matter delimiter
|
||||
if !scanner.Scan() {
|
||||
return content
|
||||
}
|
||||
firstLine := strings.TrimSpace(scanner.Text())
|
||||
if firstLine != "---" {
|
||||
return content
|
||||
}
|
||||
|
||||
// Skip until the closing ---
|
||||
for scanner.Scan() {
|
||||
if strings.TrimSpace(scanner.Text()) == "---" {
|
||||
// Return everything after the closing delimiter
|
||||
var rest strings.Builder
|
||||
for scanner.Scan() {
|
||||
rest.WriteString(scanner.Text())
|
||||
rest.WriteString("\n")
|
||||
}
|
||||
return strings.TrimLeft(rest.String(), "\n")
|
||||
}
|
||||
}
|
||||
|
||||
// No closing delimiter found, return original content
|
||||
return content
|
||||
}
|
||||
|
||||
var attachmentExts = map[string]bool{
|
||||
".png": true, ".jpg": true, ".jpeg": true, ".gif": true, ".svg": true, ".webp": true,
|
||||
".bmp": true, ".ico": true, ".tiff": true, ".tif": true,
|
||||
".mp3": true, ".mp4": true, ".wav": true, ".ogg": true, ".webm": true,
|
||||
".pdf": true, ".doc": true, ".docx": true, ".xls": true, ".xlsx": true,
|
||||
}
|
||||
|
||||
func isAttachment(ext string) bool {
|
||||
return attachmentExts[ext]
|
||||
}
|
||||
51
services/obsimport/service_test.go
Normal file
51
services/obsimport/service_test.go
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
package obsimport
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestStripFrontMatter(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "no front matter",
|
||||
input: "Hello world\nThis is a note",
|
||||
want: "Hello world\nThis is a note",
|
||||
},
|
||||
{
|
||||
name: "with front matter",
|
||||
input: "---\ntitle: Test\ntags: [a, b]\n---\nHello world\nThis is a note\n",
|
||||
want: "Hello world\nThis is a note\n",
|
||||
},
|
||||
{
|
||||
name: "only front matter",
|
||||
input: "---\ntitle: Test\n---\n",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "unclosed front matter",
|
||||
input: "---\ntitle: Test\nno closing delimiter",
|
||||
want: "---\ntitle: Test\nno closing delimiter",
|
||||
},
|
||||
{
|
||||
name: "empty string",
|
||||
input: "",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "front matter with leading newlines stripped",
|
||||
input: "---\nkey: val\n---\n\n\nBody here\n",
|
||||
want: "Body here\n",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := stripFrontMatter(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("stripFrontMatter() = %q, want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -9,6 +9,7 @@ import (
|
|||
"lmika.dev/lmika/weiro/services/auth"
|
||||
"lmika.dev/lmika/weiro/services/categories"
|
||||
"lmika.dev/lmika/weiro/services/imgedit"
|
||||
"lmika.dev/lmika/weiro/services/obsimport"
|
||||
"lmika.dev/lmika/weiro/services/pages"
|
||||
"lmika.dev/lmika/weiro/services/posts"
|
||||
"lmika.dev/lmika/weiro/services/publisher"
|
||||
|
|
@ -27,6 +28,7 @@ type Services struct {
|
|||
ImageEdit *imgedit.Service
|
||||
Categories *categories.Service
|
||||
Pages *pages.Service
|
||||
ObsImport *obsimport.Service
|
||||
}
|
||||
|
||||
func New(cfg config.Config) (*Services, error) {
|
||||
|
|
@ -46,6 +48,7 @@ func New(cfg config.Config) (*Services, error) {
|
|||
imageEditService := imgedit.New(uploadService, filepath.Join(cfg.ScratchDir, "imageedit"))
|
||||
categoriesService := categories.New(dbp, publisherQueue)
|
||||
pagesService := pages.New(dbp, publisherQueue)
|
||||
obsImportService := obsimport.New(dbp, ufp, publisherQueue, filepath.Join(cfg.ScratchDir, "obsimport"))
|
||||
|
||||
return &Services{
|
||||
DB: dbp,
|
||||
|
|
@ -58,6 +61,7 @@ func New(cfg config.Config) (*Services, error) {
|
|||
ImageEdit: imageEditService,
|
||||
Categories: categoriesService,
|
||||
Pages: pagesService,
|
||||
ObsImport: obsImportService,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue