html.go (gitea-1.13.1) | : | html.go (gitea-1.13.2) | ||
---|---|---|---|---|
skipping to change at line 46 | skipping to change at line 46 | |||
var ( | var ( | |||
// NOTE: All below regex matching do not perform any extra validation. | // NOTE: All below regex matching do not perform any extra validation. | |||
// Thus a link is produced even if the linked entity does not exist. | // Thus a link is produced even if the linked entity does not exist. | |||
// While fast, this is also incorrect and lead to false positives. | // While fast, this is also incorrect and lead to false positives. | |||
// TODO: fix invalid linking issue | // TODO: fix invalid linking issue | |||
// sha1CurrentPattern matches string that represents a commit SHA, e.g. d 8a994ef243349f321568f9e36d5c3f444b99cae | // sha1CurrentPattern matches string that represents a commit SHA, e.g. d 8a994ef243349f321568f9e36d5c3f444b99cae | |||
// Although SHA1 hashes are 40 chars long, the regex matches the hash fro m 7 to 40 chars in length | // Although SHA1 hashes are 40 chars long, the regex matches the hash fro m 7 to 40 chars in length | |||
// so that abbreviated hash links can be used as well. This matches git a nd github useability. | // so that abbreviated hash links can be used as well. This matches git a nd github useability. | |||
sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(? :\s|$|\)|\]|\.(\s|$))`) | sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(? :\s|$|\)|\]|[.,](\s|$))`) | |||
// shortLinkPattern matches short but difficult to parse [[name|link|arg= test]] syntax | // shortLinkPattern matches short but difficult to parse [[name|link|arg= test]] syntax | |||
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) | shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) | |||
// anySHA1Pattern allows to split url containing SHA into parts | // anySHA1Pattern allows to split url containing SHA into parts | |||
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/ [^#\s]+)?(#\S+)?`) | anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/ [^#\s]+)?(#\S+)?`) | |||
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) | validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) | |||
// While this email regex is definitely not perfect and I'm sure you can come up | // While this email regex is definitely not perfect and I'm sure you can come up | |||
skipping to change at line 301 | skipping to change at line 301 | |||
// in various places it isn't already run through the normal markdown procesor | // in various places it isn't already run through the normal markdown procesor | |||
func RenderEmoji( | func RenderEmoji( | |||
rawHTML []byte, | rawHTML []byte, | |||
) ([]byte, error) { | ) ([]byte, error) { | |||
ctx := &postProcessCtx{ | ctx := &postProcessCtx{ | |||
procs: emojiProcessors, | procs: emojiProcessors, | |||
} | } | |||
return ctx.postProcess(rawHTML) | return ctx.postProcess(rawHTML) | |||
} | } | |||
var byteBodyTag = []byte("<body>") | ||||
var byteBodyTagClosing = []byte("</body>") | ||||
func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) { | func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) { | |||
if ctx.procs == nil { | if ctx.procs == nil { | |||
ctx.procs = defaultProcessors | ctx.procs = defaultProcessors | |||
} | } | |||
// give a generous extra 50 bytes | // give a generous extra 50 bytes | |||
res := make([]byte, 0, len(rawHTML)+50) | res := make([]byte, 0, len(rawHTML)+50) | |||
res = append(res, byteBodyTag...) | res = append(res, "<html><body>"...) | |||
res = append(res, rawHTML...) | res = append(res, rawHTML...) | |||
res = append(res, byteBodyTagClosing...) | res = append(res, "</body></html>"...) | |||
// parse the HTML | // parse the HTML | |||
nodes, err := html.ParseFragment(bytes.NewReader(res), nil) | nodes, err := html.ParseFragment(bytes.NewReader(res), nil) | |||
if err != nil { | if err != nil { | |||
return nil, &postProcessError{"invalid HTML", err} | return nil, &postProcessError{"invalid HTML", err} | |||
} | } | |||
for _, node := range nodes { | for _, node := range nodes { | |||
ctx.visitNode(node, true) | ctx.visitNode(node, true) | |||
} | } | |||
newNodes := make([]*html.Node, 0, len(nodes)) | ||||
for _, node := range nodes { | ||||
if node.Data == "html" { | ||||
node = node.FirstChild | ||||
for node != nil && node.Data != "body" { | ||||
node = node.NextSibling | ||||
} | ||||
} | ||||
if node == nil { | ||||
continue | ||||
} | ||||
if node.Data == "body" { | ||||
child := node.FirstChild | ||||
for child != nil { | ||||
newNodes = append(newNodes, child) | ||||
child = child.NextSibling | ||||
} | ||||
} else { | ||||
newNodes = append(newNodes, node) | ||||
} | ||||
} | ||||
nodes = newNodes | ||||
// Create buffer in which the data will be placed again. We know that the | // Create buffer in which the data will be placed again. We know that the | |||
// length will be at least that of res; to spare a few alloc+copy, we | // length will be at least that of res; to spare a few alloc+copy, we | |||
// reuse res, resetting its length to 0. | // reuse res, resetting its length to 0. | |||
buf := bytes.NewBuffer(res[:0]) | buf := bytes.NewBuffer(res[:0]) | |||
// Render everything to buf. | // Render everything to buf. | |||
for _, node := range nodes { | for _, node := range nodes { | |||
err = html.Render(buf, node) | err = html.Render(buf, node) | |||
if err != nil { | if err != nil { | |||
return nil, &postProcessError{"error rendering processed HTML", err} | return nil, &postProcessError{"error rendering processed HTML", err} | |||
} | } | |||
} | } | |||
// remove initial parts - because Render creates a whole HTML page. | ||||
res = buf.Bytes() | ||||
res = res[bytes.Index(res, byteBodyTag)+len(byteBodyTag) : bytes.LastInde | ||||
x(res, byteBodyTagClosing)] | ||||
// Everything done successfully, return parsed data. | // Everything done successfully, return parsed data. | |||
return res, nil | return buf.Bytes(), nil | |||
} | } | |||
func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) { | func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) { | |||
// Add user-content- to IDs if they don't already have them | // Add user-content- to IDs if they don't already have them | |||
for idx, attr := range node.Attr { | for idx, attr := range node.Attr { | |||
if attr.Key == "id" && !(strings.HasPrefix(attr.Val, "user-conten t-") || blackfridayExtRegex.MatchString(attr.Val)) { | if attr.Key == "id" && !(strings.HasPrefix(attr.Val, "user-conten t-") || blackfridayExtRegex.MatchString(attr.Val)) { | |||
node.Attr[idx].Val = "user-content-" + attr.Val | node.Attr[idx].Val = "user-content-" + attr.Val | |||
} | } | |||
if attr.Key == "class" && attr.Val == "emoji" { | if attr.Key == "class" && attr.Val == "emoji" { | |||
End of changes. 7 change blocks. | ||||
12 lines changed or deleted | 29 lines changed or added |