gitea source for verification 2026-05-22
This commit is contained in:
200
modules/charset/htmlstream.go
Normal file
200
modules/charset/htmlstream.go
Normal file
@@ -0,0 +1,200 @@
|
||||
// Copyright 2022 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package charset
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// HTMLStreamer represents a SAX-like interface for HTML
|
||||
type HTMLStreamer interface {
|
||||
Error(err error) error
|
||||
Doctype(data string) error
|
||||
Comment(data string) error
|
||||
StartTag(data string, attrs ...html.Attribute) error
|
||||
SelfClosingTag(data string, attrs ...html.Attribute) error
|
||||
EndTag(data string) error
|
||||
Text(data string) error
|
||||
}
|
||||
|
||||
// PassthroughHTMLStreamer is a passthrough streamer
|
||||
type PassthroughHTMLStreamer struct {
|
||||
next HTMLStreamer
|
||||
}
|
||||
|
||||
func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer {
|
||||
return &PassthroughHTMLStreamer{next: next}
|
||||
}
|
||||
|
||||
var _ (HTMLStreamer) = &PassthroughHTMLStreamer{}
|
||||
|
||||
// Error tells the next streamer in line that there is an error
|
||||
func (p *PassthroughHTMLStreamer) Error(err error) error {
|
||||
return p.next.Error(err)
|
||||
}
|
||||
|
||||
// Doctype tells the next streamer what the doctype is
|
||||
func (p *PassthroughHTMLStreamer) Doctype(data string) error {
|
||||
return p.next.Doctype(data)
|
||||
}
|
||||
|
||||
// Comment tells the next streamer there is a comment
|
||||
func (p *PassthroughHTMLStreamer) Comment(data string) error {
|
||||
return p.next.Comment(data)
|
||||
}
|
||||
|
||||
// StartTag tells the next streamer there is a starting tag
|
||||
func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error {
|
||||
return p.next.StartTag(data, attrs...)
|
||||
}
|
||||
|
||||
// SelfClosingTag tells the next streamer there is a self-closing tag
|
||||
func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error {
|
||||
return p.next.SelfClosingTag(data, attrs...)
|
||||
}
|
||||
|
||||
// EndTag tells the next streamer there is a end tag
|
||||
func (p *PassthroughHTMLStreamer) EndTag(data string) error {
|
||||
return p.next.EndTag(data)
|
||||
}
|
||||
|
||||
// Text tells the next streamer there is a text
|
||||
func (p *PassthroughHTMLStreamer) Text(data string) error {
|
||||
return p.next.Text(data)
|
||||
}
|
||||
|
||||
// HTMLStreamWriter acts as a writing sink
|
||||
type HTMLStreamerWriter struct {
|
||||
io.Writer
|
||||
err error
|
||||
}
|
||||
|
||||
// Write implements io.Writer
|
||||
func (h *HTMLStreamerWriter) Write(data []byte) (int, error) {
|
||||
if h.err != nil {
|
||||
return 0, h.err
|
||||
}
|
||||
return h.Writer.Write(data)
|
||||
}
|
||||
|
||||
// Write implements io.StringWriter
|
||||
func (h *HTMLStreamerWriter) WriteString(data string) (int, error) {
|
||||
if h.err != nil {
|
||||
return 0, h.err
|
||||
}
|
||||
return h.Writer.Write([]byte(data))
|
||||
}
|
||||
|
||||
// Error tells the next streamer in line that there is an error
|
||||
func (h *HTMLStreamerWriter) Error(err error) error {
|
||||
if h.err == nil {
|
||||
h.err = err
|
||||
}
|
||||
return h.err
|
||||
}
|
||||
|
||||
// Doctype tells the next streamer what the doctype is
|
||||
func (h *HTMLStreamerWriter) Doctype(data string) error {
|
||||
_, h.err = h.WriteString("<!DOCTYPE " + data + ">")
|
||||
return h.err
|
||||
}
|
||||
|
||||
// Comment tells the next streamer there is a comment
|
||||
func (h *HTMLStreamerWriter) Comment(data string) error {
|
||||
_, h.err = h.WriteString("<!--" + data + "-->")
|
||||
return h.err
|
||||
}
|
||||
|
||||
// StartTag tells the next streamer there is a starting tag
|
||||
func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error {
|
||||
return h.startTag(data, attrs, false)
|
||||
}
|
||||
|
||||
// SelfClosingTag tells the next streamer there is a self-closing tag
|
||||
func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error {
|
||||
return h.startTag(data, attrs, true)
|
||||
}
|
||||
|
||||
func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error {
|
||||
if _, h.err = h.WriteString("<" + data); h.err != nil {
|
||||
return h.err
|
||||
}
|
||||
for _, attr := range attrs {
|
||||
if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil {
|
||||
return h.err
|
||||
}
|
||||
}
|
||||
if selfclosing {
|
||||
if _, h.err = h.WriteString("/>"); h.err != nil {
|
||||
return h.err
|
||||
}
|
||||
} else {
|
||||
if _, h.err = h.WriteString(">"); h.err != nil {
|
||||
return h.err
|
||||
}
|
||||
}
|
||||
return h.err
|
||||
}
|
||||
|
||||
// EndTag tells the next streamer there is a end tag
|
||||
func (h *HTMLStreamerWriter) EndTag(data string) error {
|
||||
_, h.err = h.WriteString("</" + data + ">")
|
||||
return h.err
|
||||
}
|
||||
|
||||
// Text tells the next streamer there is a text
|
||||
func (h *HTMLStreamerWriter) Text(data string) error {
|
||||
_, h.err = h.WriteString(html.EscapeString(data))
|
||||
return h.err
|
||||
}
|
||||
|
||||
// StreamHTML streams an html to a provided streamer
|
||||
func StreamHTML(source io.Reader, streamer HTMLStreamer) error {
|
||||
tokenizer := html.NewTokenizer(source)
|
||||
for {
|
||||
tt := tokenizer.Next()
|
||||
switch tt {
|
||||
case html.ErrorToken:
|
||||
if tokenizer.Err() != io.EOF {
|
||||
return tokenizer.Err()
|
||||
}
|
||||
return nil
|
||||
case html.DoctypeToken:
|
||||
token := tokenizer.Token()
|
||||
if err := streamer.Doctype(token.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
case html.CommentToken:
|
||||
token := tokenizer.Token()
|
||||
if err := streamer.Comment(token.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
case html.StartTagToken:
|
||||
token := tokenizer.Token()
|
||||
if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
|
||||
return err
|
||||
}
|
||||
case html.SelfClosingTagToken:
|
||||
token := tokenizer.Token()
|
||||
if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
|
||||
return err
|
||||
}
|
||||
case html.EndTagToken:
|
||||
token := tokenizer.Token()
|
||||
if err := streamer.EndTag(token.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
case html.TextToken:
|
||||
token := tokenizer.Token()
|
||||
if err := streamer.Text(token.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unknown type of token: %d", tt)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user