"Fossies" - the Fresh Open Source Software Archive

Member "hugo-0.112.4/transform/urlreplacers/absurlreplacer.go" (28 May 2023, 4892 Bytes) of package /linux/www/hugo-0.112.4.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Go source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 // Copyright 2018 The Hugo Authors. All rights reserved.
    2 //
    3 // Licensed under the Apache License, Version 2.0 (the "License");
    4 // you may not use this file except in compliance with the License.
    5 // You may obtain a copy of the License at
    6 // http://www.apache.org/licenses/LICENSE-2.0
    7 //
    8 // Unless required by applicable law or agreed to in writing, software
    9 // distributed under the License is distributed on an "AS IS" BASIS,
   10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   11 // See the License for the specific language governing permissions and
   12 // limitations under the License.
   13 
   14 package urlreplacers
   15 
   16 import (
   17     "bytes"
   18     "io"
   19     "unicode"
   20     "unicode/utf8"
   21 
   22     "github.com/gohugoio/hugo/transform"
   23 )
   24 
   25 type absurllexer struct {
   26     // the source to absurlify
   27     content []byte
   28     // the target for the new absurlified content
   29     w io.Writer
   30 
   31     // path may be set to a "." relative path
   32     path []byte
   33 
   34     pos   int // input position
   35     start int // item start position
   36 
   37     quotes [][]byte
   38 }
   39 
   40 type prefix struct {
   41     disabled bool
   42     b        []byte
   43     f        func(l *absurllexer)
   44 
   45     nextPos int
   46 }
   47 
   48 func (p *prefix) find(bs []byte, start int) bool {
   49     if p.disabled {
   50         return false
   51     }
   52 
   53     if p.nextPos == -1 {
   54         idx := bytes.Index(bs[start:], p.b)
   55 
   56         if idx == -1 {
   57             p.disabled = true
   58             // Find the closest match
   59             return false
   60         }
   61 
   62         p.nextPos = start + idx + len(p.b)
   63     }
   64 
   65     return true
   66 }
   67 
   68 func newPrefixState() []*prefix {
   69     return []*prefix{
   70         {b: []byte("src="), f: checkCandidateBase},
   71         {b: []byte("href="), f: checkCandidateBase},
   72         {b: []byte("url="), f: checkCandidateBase},
   73         {b: []byte("action="), f: checkCandidateBase},
   74         {b: []byte("srcset="), f: checkCandidateSrcset},
   75     }
   76 }
   77 
   78 func (l *absurllexer) emit() {
   79     l.w.Write(l.content[l.start:l.pos])
   80     l.start = l.pos
   81 }
   82 
   83 var (
   84     relURLPrefix    = []byte("/")
   85     relURLPrefixLen = len(relURLPrefix)
   86 )
   87 
   88 func (l *absurllexer) consumeQuote() []byte {
   89     for _, q := range l.quotes {
   90         if bytes.HasPrefix(l.content[l.pos:], q) {
   91             l.pos += len(q)
   92             l.emit()
   93             return q
   94         }
   95     }
   96     return nil
   97 }
   98 
   99 // handle URLs in src and href.
  100 func checkCandidateBase(l *absurllexer) {
  101     l.consumeQuote()
  102 
  103     if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
  104         return
  105     }
  106 
  107     // check for schemaless URLs
  108     posAfter := l.pos + relURLPrefixLen
  109     if posAfter >= len(l.content) {
  110         return
  111     }
  112     r, _ := utf8.DecodeRune(l.content[posAfter:])
  113     if r == '/' {
  114         // schemaless: skip
  115         return
  116     }
  117     if l.pos > l.start {
  118         l.emit()
  119     }
  120     l.pos += relURLPrefixLen
  121     l.w.Write(l.path)
  122     l.start = l.pos
  123 }
  124 
  125 func (l *absurllexer) posAfterURL(q []byte) int {
  126     if len(q) > 0 {
  127         // look for end quote
  128         return bytes.Index(l.content[l.pos:], q)
  129     }
  130 
  131     return bytes.IndexFunc(l.content[l.pos:], func(r rune) bool {
  132         return r == '>' || unicode.IsSpace(r)
  133     })
  134 }
  135 
  136 // handle URLs in srcset.
  137 func checkCandidateSrcset(l *absurllexer) {
  138     q := l.consumeQuote()
  139     if q == nil {
  140         // srcset needs to be quoted.
  141         return
  142     }
  143 
  144     // special case, not frequent (me think)
  145     if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
  146         return
  147     }
  148 
  149     // check for schemaless URLs
  150     posAfter := l.pos + relURLPrefixLen
  151     if posAfter >= len(l.content) {
  152         return
  153     }
  154     r, _ := utf8.DecodeRune(l.content[posAfter:])
  155     if r == '/' {
  156         // schemaless: skip
  157         return
  158     }
  159 
  160     posEnd := l.posAfterURL(q)
  161 
  162     // safe guard
  163     if posEnd < 0 || posEnd > 2000 {
  164         return
  165     }
  166 
  167     if l.pos > l.start {
  168         l.emit()
  169     }
  170 
  171     section := l.content[l.pos : l.pos+posEnd+1]
  172 
  173     fields := bytes.Fields(section)
  174     for i, f := range fields {
  175         if f[0] == '/' {
  176             l.w.Write(l.path)
  177             l.w.Write(f[1:])
  178 
  179         } else {
  180             l.w.Write(f)
  181         }
  182 
  183         if i < len(fields)-1 {
  184             l.w.Write([]byte(" "))
  185         }
  186     }
  187 
  188     l.pos += len(section)
  189     l.start = l.pos
  190 }
  191 
  192 // main loop
  193 func (l *absurllexer) replace() {
  194     contentLength := len(l.content)
  195 
  196     prefixes := newPrefixState()
  197 
  198     for {
  199         if l.pos >= contentLength {
  200             break
  201         }
  202 
  203         var match *prefix
  204 
  205         for _, p := range prefixes {
  206             if !p.find(l.content, l.pos) {
  207                 continue
  208             }
  209 
  210             if match == nil || p.nextPos < match.nextPos {
  211                 match = p
  212             }
  213         }
  214 
  215         if match == nil {
  216             // Done!
  217             l.pos = contentLength
  218             break
  219         } else {
  220             l.pos = match.nextPos
  221             match.nextPos = -1
  222             match.f(l)
  223         }
  224     }
  225     // Done!
  226     if l.pos > l.start {
  227         l.emit()
  228     }
  229 }
  230 
  231 func doReplace(path string, ct transform.FromTo, quotes [][]byte) {
  232     lexer := &absurllexer{
  233         content: ct.From().Bytes(),
  234         w:       ct.To(),
  235         path:    []byte(path),
  236         quotes:  quotes,
  237     }
  238 
  239     lexer.replace()
  240 }
  241 
  242 type absURLReplacer struct {
  243     htmlQuotes [][]byte
  244     xmlQuotes  [][]byte
  245 }
  246 
  247 func newAbsURLReplacer() *absURLReplacer {
  248     return &absURLReplacer{
  249         htmlQuotes: [][]byte{[]byte("\""), []byte("'")},
  250         xmlQuotes:  [][]byte{[]byte("&#34;"), []byte("&#39;")},
  251     }
  252 }
  253 
  254 func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) {
  255     doReplace(path, ct, au.htmlQuotes)
  256 }
  257 
  258 func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) {
  259     doReplace(path, ct, au.xmlQuotes)
  260 }