"Fossies" - the Fresh Open Source Software Archive

Member "hugo-0.62.2/transform/urlreplacers/absurlreplacer.go" (5 Jan 2020, 4843 Bytes) of package /linux/www/hugo-0.62.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Go source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 // Copyright 2018 The Hugo Authors. All rights reserved.
    2 //
    3 // Licensed under the Apache License, Version 2.0 (the "License");
    4 // you may not use this file except in compliance with the License.
    5 // You may obtain a copy of the License at
    6 // http://www.apache.org/licenses/LICENSE-2.0
    7 //
    8 // Unless required by applicable law or agreed to in writing, software
    9 // distributed under the License is distributed on an "AS IS" BASIS,
   10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   11 // See the License for the specific language governing permissions and
   12 // limitations under the License.
   13 
   14 package urlreplacers
   15 
   16 import (
   17     "bytes"
   18     "io"
   19     "unicode"
   20     "unicode/utf8"
   21 
   22     "github.com/gohugoio/hugo/transform"
   23 )
   24 
   25 type absurllexer struct {
   26     // the source to absurlify
   27     content []byte
   28     // the target for the new absurlified content
   29     w io.Writer
   30 
   31     // path may be set to a "." relative path
   32     path []byte
   33 
   34     pos   int // input position
   35     start int // item start position
   36 
   37     quotes [][]byte
   38 }
   39 
   40 type prefix struct {
   41     disabled bool
   42     b        []byte
   43     f        func(l *absurllexer)
   44 
   45     nextPos int
   46 }
   47 
   48 func (p *prefix) find(bs []byte, start int) bool {
   49     if p.disabled {
   50         return false
   51     }
   52 
   53     if p.nextPos == -1 {
   54         idx := bytes.Index(bs[start:], p.b)
   55 
   56         if idx == -1 {
   57             p.disabled = true
   58             // Find the closest match
   59             return false
   60         }
   61 
   62         p.nextPos = start + idx + len(p.b)
   63     }
   64 
   65     return true
   66 }
   67 
   68 func newPrefixState() []*prefix {
   69     return []*prefix{
   70         {b: []byte("src="), f: checkCandidateBase},
   71         {b: []byte("href="), f: checkCandidateBase},
   72         {b: []byte("action="), f: checkCandidateBase},
   73         {b: []byte("srcset="), f: checkCandidateSrcset},
   74     }
   75 }
   76 
   77 func (l *absurllexer) emit() {
   78     l.w.Write(l.content[l.start:l.pos])
   79     l.start = l.pos
   80 }
   81 
   82 var (
   83     relURLPrefix    = []byte("/")
   84     relURLPrefixLen = len(relURLPrefix)
   85 )
   86 
   87 func (l *absurllexer) consumeQuote() []byte {
   88     for _, q := range l.quotes {
   89         if bytes.HasPrefix(l.content[l.pos:], q) {
   90             l.pos += len(q)
   91             l.emit()
   92             return q
   93         }
   94     }
   95     return nil
   96 }
   97 
   98 // handle URLs in src and href.
   99 func checkCandidateBase(l *absurllexer) {
  100     l.consumeQuote()
  101 
  102     if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
  103         return
  104     }
  105 
  106     // check for schemaless URLs
  107     posAfter := l.pos + relURLPrefixLen
  108     if posAfter >= len(l.content) {
  109         return
  110     }
  111     r, _ := utf8.DecodeRune(l.content[posAfter:])
  112     if r == '/' {
  113         // schemaless: skip
  114         return
  115     }
  116     if l.pos > l.start {
  117         l.emit()
  118     }
  119     l.pos += relURLPrefixLen
  120     l.w.Write(l.path)
  121     l.start = l.pos
  122 }
  123 
  124 func (l *absurllexer) posAfterURL(q []byte) int {
  125     if len(q) > 0 {
  126         // look for end quote
  127         return bytes.Index(l.content[l.pos:], q)
  128     }
  129 
  130     return bytes.IndexFunc(l.content[l.pos:], func(r rune) bool {
  131         return r == '>' || unicode.IsSpace(r)
  132     })
  133 
  134 }
  135 
  136 // handle URLs in srcset.
  137 func checkCandidateSrcset(l *absurllexer) {
  138     q := l.consumeQuote()
  139     if q == nil {
  140         // srcset needs to be quoted.
  141         return
  142     }
  143 
  144     // special case, not frequent (me think)
  145     if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
  146         return
  147     }
  148 
  149     // check for schemaless URLs
  150     posAfter := l.pos + relURLPrefixLen
  151     if posAfter >= len(l.content) {
  152         return
  153     }
  154     r, _ := utf8.DecodeRune(l.content[posAfter:])
  155     if r == '/' {
  156         // schemaless: skip
  157         return
  158     }
  159 
  160     posEnd := l.posAfterURL(q)
  161 
  162     // safe guard
  163     if posEnd < 0 || posEnd > 2000 {
  164         return
  165     }
  166 
  167     if l.pos > l.start {
  168         l.emit()
  169     }
  170 
  171     section := l.content[l.pos : l.pos+posEnd+1]
  172 
  173     fields := bytes.Fields(section)
  174     for i, f := range fields {
  175         if f[0] == '/' {
  176             l.w.Write(l.path)
  177             l.w.Write(f[1:])
  178 
  179         } else {
  180             l.w.Write(f)
  181         }
  182 
  183         if i < len(fields)-1 {
  184             l.w.Write([]byte(" "))
  185         }
  186     }
  187 
  188     l.pos += len(section)
  189     l.start = l.pos
  190 
  191 }
  192 
  193 // main loop
  194 func (l *absurllexer) replace() {
  195     contentLength := len(l.content)
  196 
  197     prefixes := newPrefixState()
  198 
  199     for {
  200         if l.pos >= contentLength {
  201             break
  202         }
  203 
  204         var match *prefix
  205 
  206         for _, p := range prefixes {
  207             if !p.find(l.content, l.pos) {
  208                 continue
  209             }
  210 
  211             if match == nil || p.nextPos < match.nextPos {
  212                 match = p
  213             }
  214         }
  215 
  216         if match == nil {
  217             // Done!
  218             l.pos = contentLength
  219             break
  220         } else {
  221             l.pos = match.nextPos
  222             match.nextPos = -1
  223             match.f(l)
  224         }
  225     }
  226     // Done!
  227     if l.pos > l.start {
  228         l.emit()
  229     }
  230 }
  231 
  232 func doReplace(path string, ct transform.FromTo, quotes [][]byte) {
  233 
  234     lexer := &absurllexer{
  235         content: ct.From().Bytes(),
  236         w:       ct.To(),
  237         path:    []byte(path),
  238         quotes:  quotes}
  239 
  240     lexer.replace()
  241 }
  242 
  243 type absURLReplacer struct {
  244     htmlQuotes [][]byte
  245     xmlQuotes  [][]byte
  246 }
  247 
  248 func newAbsURLReplacer() *absURLReplacer {
  249     return &absURLReplacer{
  250         htmlQuotes: [][]byte{[]byte("\""), []byte("'")},
  251         xmlQuotes:  [][]byte{[]byte("&#34;"), []byte("&#39;")}}
  252 }
  253 
  254 func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) {
  255     doReplace(path, ct, au.htmlQuotes)
  256 }
  257 
  258 func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) {
  259     doReplace(path, ct, au.xmlQuotes)
  260 }