Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions html2text.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ type options struct {
lbr string
linksInnerText bool
listPrefix string
keepSpaces bool
}

func newOptions() *options {
// apply defaults
return &options{
lbr: WIN_LBR,
keepSpaces: false,
}
}

Expand Down Expand Up @@ -64,6 +66,13 @@ func WithListSupport() Option {
return WithListSupportPrefix(" - ")
}

// WithKeepSpaces keep spaces as they are
func WithKeepSpaces() Option {
return func(o *options) {
o.keepSpaces = true
}
}

func parseHTMLEntity(entName string) (string, bool) {
if r, ok := entity[entName]; ok {
return string(r), true
Expand Down Expand Up @@ -193,15 +202,21 @@ func HTML2TextWithOptions(html string, reqOpts ...Option) string {
}

switch {
// skip new lines and spaces adding a single space if not there yet
case r <= 0xD, r == 0x85, r == 0x2028, r == 0x2029, // new lines
r == ' ', r >= 0x2008 && r <= 0x200B: // spaces
// skip new lines adding a single space if not there yet
case r <= 0xD, r == 0x85, r == 0x2028, r == 0x2029: // new lines
if shouldOutput && badTagStackDepth == 0 && !inEnt {
//outBuf.WriteString(fmt.Sprintf("{DBG r:%c, inEnt:%t, tag:%s}", r, inEnt, html[tagStart:i]))
writeSpace(outBuf)
}
continue

// skip spaces adding a single space if not there yet
case r == ' ', r >= 0x2008 && r <= 0x200B: // spaces
if !opts.keepSpaces && shouldOutput && badTagStackDepth == 0 && !inEnt {
writeSpace(outBuf)
continue
}

case r == ';' && inEnt: // end of html entity
inEnt = false
continue
Expand Down
4 changes: 4 additions & 0 deletions html2text_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,5 +138,9 @@ func TestHTML2Text(t *testing.T) {
So(HTML2Text(`<aa x="1">hello</aa>`), ShouldEqual, "hello")
})

Convey("Keep spaces as they are", func() {
So(HTML2TextWithOptions("should not ignore spaces", WithKeepSpaces()), ShouldEqual, "should not ignore spaces")
})

})
}