Merge pull request #1513 from crazy-max/dependabot/go_modules/github.com/go-playground/validator/v10-10.28.0

chore(deps): bump github.com/go-playground/validator/v10 from 10.27.0 to 10.28.0
This commit is contained in:
CrazyMax
2025-11-09 17:12:18 +01:00
committed by GitHub
31 changed files with 1785 additions and 1217 deletions

4
go.mod
View File

@@ -19,7 +19,7 @@ require (
github.com/dromara/carbon/v2 v2.6.14
github.com/eclipse/paho.mqtt.golang v1.5.0
github.com/go-gomail/gomail v0.0.0-20160411212932-81ebce5c23df
github.com/go-playground/validator/v10 v10.27.0
github.com/go-playground/validator/v10 v10.28.0
github.com/hashicorp/nomad/api v0.0.0-20250812204832-62b195aaa535 // v1.10.4
github.com/jedib0t/go-pretty/v6 v6.6.8
github.com/matcornic/hermes/v2 v2.1.0
@@ -76,7 +76,7 @@ require (
github.com/felixge/fgprof v0.9.5 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.8 // indirect
github.com/gabriel-vasile/mimetype v1.4.10 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect

8
go.sum
View File

@@ -111,8 +111,8 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
github.com/gabriel-vasile/mimetype v1.4.10 h1:zyueNbySn/z8mJZHLt6IPw0KoZsiQNszIpU+bX4+ZK0=
github.com/gabriel-vasile/mimetype v1.4.10/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
github.com/go-gomail/gomail v0.0.0-20160411212932-81ebce5c23df h1:Bao6dhmbTA1KFVxmJ6nBoMuOJit2yjEgLJpIMYpop0E=
github.com/go-gomail/gomail v0.0.0-20160411212932-81ebce5c23df/go.mod h1:GJr+FCSXshIwgHBtLglIg9M2l2kQSi6QjVAngtzI08Y=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@@ -132,8 +132,8 @@ github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/o
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.27.0 h1:w8+XrWVMhGkxOaaowyKH35gFydVHOvC0/uWoy2Fzwn4=
github.com/go-playground/validator/v10 v10.27.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo=
github.com/go-playground/validator/v10 v10.28.0 h1:Q7ibns33JjyW48gHkuFT91qX48KG0ktULL6FgHdG688=
github.com/go-playground/validator/v10 v10.28.0/go.mod h1:GoI6I1SjPBh9p7ykNE/yj3fFYbyDOpwMn5KXd+m2hUU=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=

View File

@@ -0,0 +1,5 @@
version: "2"
linters:
exclusions:
presets:
- std-error-handling

View File

@@ -1,76 +0,0 @@
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at vasile.gabriel@email.com. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq

View File

@@ -1,12 +0,0 @@
## Contribute
Contributions to **mimetype** are welcome. If you find an issue and you consider
contributing, you can use the [Github issues tracker](https://github.com/gabriel-vasile/mimetype/issues)
in order to report it, or better yet, open a pull request.
Code contributions must respect these rules:
- code must be test covered
- code must be formatted using gofmt tool
- exported names must be documented
**Important**: By submitting a pull request, you agree to allow the project
owner to license your work under the same license as that used by the project.

View File

@@ -27,6 +27,7 @@
- possibility to [extend](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#example-package-Extend) with other file formats
- common file formats are prioritized
- [text vs. binary files differentiation](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#example-package-TextVsBinary)
- no external dependencies
- safe for concurrent usage
## Install
@@ -45,8 +46,7 @@ fmt.Println(mtype.String(), mtype.Extension())
```
See the [runnable Go Playground examples](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#pkg-overview).
## Usage'
Only use libraries like **mimetype** as a last resort. Content type detection
Caution: only use libraries like **mimetype** as a last resort. Content type detection
using magic numbers is slow, inaccurate, and non-standard. Most of the times
protocols have methods for specifying such metadata; e.g., `Content-Type` header
in HTTP and SMTP.
@@ -67,6 +67,18 @@ mimetype.DetectFile("file.doc")
If increasing the limit does not help, please
[open an issue](https://github.com/gabriel-vasile/mimetype/issues/new?assignees=&labels=&template=mismatched-mime-type-detected.md&title=).
## Tests
In addition to unit tests,
[mimetype_tests](https://github.com/gabriel-vasile/mimetype_tests) compares the
library with the [Unix file utility](https://en.wikipedia.org/wiki/File_(command))
for around 50 000 sample files. Check the latest comparison results
[here](https://github.com/gabriel-vasile/mimetype_tests/actions).
## Benchmarks
Benchmarks for each file format are performed when a PR is open. The results can
be seen on the [workflows page](https://github.com/gabriel-vasile/mimetype/actions/workflows/benchmark.yml).
Performance improvements are welcome but correctness is prioritized.
## Structure
**mimetype** uses a hierarchical structure to keep the MIME type detection logic.
This reduces the number of calls needed for detecting the file type. The reason
@@ -84,19 +96,8 @@ or from a [file](https://pkg.go.dev/github.com/gabriel-vasile/mimetype#DetectFil
<img alt="how project is structured" src="https://raw.githubusercontent.com/gabriel-vasile/mimetype/master/testdata/gif.gif" width="88%">
</div>
## Performance
Thanks to the hierarchical structure, searching for common formats first,
and limiting itself to file headers, **mimetype** matches the performance of
stdlib `http.DetectContentType` while outperforming the alternative package.
```bash
mimetype http.DetectContentType filetype
BenchmarkMatchTar-24 250 ns/op 400 ns/op 3778 ns/op
BenchmarkMatchZip-24 524 ns/op 351 ns/op 4884 ns/op
BenchmarkMatchJpeg-24 103 ns/op 228 ns/op 839 ns/op
BenchmarkMatchGif-24 139 ns/op 202 ns/op 751 ns/op
BenchmarkMatchPng-24 165 ns/op 221 ns/op 1176 ns/op
```
## Contributing
See [CONTRIBUTING.md](CONTRIBUTING.md).
Contributions are unexpected but welcome. When submitting a PR for detection of
a new file format, please make sure to add a record to the list of testcases
from [mimetype_test.go](mimetype_test.go). For complex files a record can be added
in the [testdata](testdata) directory.

View File

@@ -2,11 +2,10 @@ package charset
import (
"bytes"
"encoding/xml"
"strings"
"unicode/utf8"
"golang.org/x/net/html"
"github.com/gabriel-vasile/mimetype/internal/markup"
"github.com/gabriel-vasile/mimetype/internal/scan"
)
const (
@@ -141,20 +140,31 @@ func FromXML(content []byte) string {
}
return FromPlain(content)
}
func fromXML(content []byte) string {
content = trimLWS(content)
dec := xml.NewDecoder(bytes.NewReader(content))
rawT, err := dec.RawToken()
if err != nil {
return ""
func fromXML(s scan.Bytes) string {
xml := []byte("<?XML")
lxml := len(xml)
for {
if len(s) == 0 {
return ""
}
for scan.ByteIsWS(s.Peek()) {
s.Advance(1)
}
if len(s) <= lxml {
return ""
}
if !s.Match(xml, scan.IgnoreCase) {
s = s[1:] // safe to slice instead of s.Advance(1) because bounds are checked
continue
}
aName, aVal, hasMore := "", "", true
for hasMore {
aName, aVal, hasMore = markup.GetAnAttribute(&s)
if aName == "encoding" && aVal != "" {
return aVal
}
}
}
t, ok := rawT.(xml.ProcInst)
if !ok {
return ""
}
return strings.ToLower(xmlEncoding(string(t.Inst)))
}
// FromHTML returns the charset of an HTML document. It first looks if a BOM is
@@ -171,139 +181,103 @@ func FromHTML(content []byte) string {
return FromPlain(content)
}
func fromHTML(content []byte) string {
z := html.NewTokenizer(bytes.NewReader(content))
func fromHTML(s scan.Bytes) string {
const (
dontKnow = iota
doNeedPragma
doNotNeedPragma
)
meta := []byte("<META")
body := []byte("<BODY")
lmeta := len(meta)
for {
switch z.Next() {
case html.ErrorToken:
return ""
case html.StartTagToken, html.SelfClosingTagToken:
tagName, hasAttr := z.TagName()
if !bytes.Equal(tagName, []byte("meta")) {
continue
}
attrList := make(map[string]bool)
gotPragma := false
const (
dontKnow = iota
doNeedPragma
doNotNeedPragma
)
needPragma := dontKnow
name := ""
for hasAttr {
var key, val []byte
key, val, hasAttr = z.TagAttr()
ks := string(key)
if attrList[ks] {
continue
}
attrList[ks] = true
for i, c := range val {
if 'A' <= c && c <= 'Z' {
val[i] = c + 0x20
}
}
switch ks {
case "http-equiv":
if bytes.Equal(val, []byte("content-type")) {
gotPragma = true
}
case "content":
name = fromMetaElement(string(val))
if name != "" {
needPragma = doNeedPragma
}
case "charset":
name = string(val)
needPragma = doNotNeedPragma
}
}
if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
continue
}
if strings.HasPrefix(name, "utf-16") {
name = "utf-8"
}
return name
}
}
}
func fromMetaElement(s string) string {
for s != "" {
csLoc := strings.Index(s, "charset")
if csLoc == -1 {
return ""
}
s = s[csLoc+len("charset"):]
s = strings.TrimLeft(s, " \t\n\f\r")
if !strings.HasPrefix(s, "=") {
if markup.SkipAComment(&s) {
continue
}
s = s[1:]
s = strings.TrimLeft(s, " \t\n\f\r")
if s == "" {
if len(s) <= lmeta {
return ""
}
if q := s[0]; q == '"' || q == '\'' {
s = s[1:]
closeQuote := strings.IndexRune(s, rune(q))
if closeQuote == -1 {
return ""
// Abort when <body is reached.
if s.Match(body, scan.IgnoreCase) {
return ""
}
if !s.Match(meta, scan.IgnoreCase) {
s = s[1:] // safe to slice instead of s.Advance(1) because bounds are checked
continue
}
s = s[lmeta:]
c := s.Pop()
if c == 0 || (!scan.ByteIsWS(c) && c != '/') {
return ""
}
attrList := make(map[string]bool)
gotPragma := false
needPragma := dontKnow
charset := ""
aName, aVal, hasMore := "", "", true
for hasMore {
aName, aVal, hasMore = markup.GetAnAttribute(&s)
if attrList[aName] {
continue
}
// processing step
if len(aName) == 0 && len(aVal) == 0 {
if needPragma == dontKnow {
continue
}
if needPragma == doNeedPragma && !gotPragma {
continue
}
}
attrList[aName] = true
if aName == "http-equiv" && scan.Bytes(aVal).Match([]byte("CONTENT-TYPE"), scan.IgnoreCase) {
gotPragma = true
} else if aName == "content" {
charset = string(extractCharsetFromMeta(scan.Bytes(aVal)))
if len(charset) != 0 {
needPragma = doNeedPragma
}
} else if aName == "charset" {
charset = aVal
needPragma = doNotNeedPragma
}
return s[:closeQuote]
}
end := strings.IndexAny(s, "; \t\n\f\r")
if end == -1 {
end = len(s)
if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
continue
}
return s[:end]
return charset
}
return ""
}
func xmlEncoding(s string) string {
param := "encoding="
idx := strings.Index(s, param)
if idx == -1 {
return ""
}
v := s[idx+len(param):]
if v == "" {
return ""
}
if v[0] != '\'' && v[0] != '"' {
return ""
}
idx = strings.IndexRune(v[1:], rune(v[0]))
if idx == -1 {
return ""
}
return v[1 : idx+1]
}
// https://html.spec.whatwg.org/multipage/urls-and-fetching.html#algorithm-for-extracting-a-character-encoding-from-a-meta-element
func extractCharsetFromMeta(s scan.Bytes) []byte {
for {
i := bytes.Index(s, []byte("charset"))
if i == -1 {
return nil
}
s.Advance(i + len("charset"))
for scan.ByteIsWS(s.Peek()) {
s.Advance(1)
}
if s.Pop() != '=' {
continue
}
for scan.ByteIsWS(s.Peek()) {
s.Advance(1)
}
quote := s.Peek()
if quote == 0 {
return nil
}
if quote == '"' || quote == '\'' {
s.Advance(1)
return bytes.TrimSpace(s.PopUntil(quote))
}
// trimLWS trims whitespace from beginning of the input.
// TODO: find a way to call trimLWS once per detection instead of once in each
// detector which needs the trimmed input.
func trimLWS(in []byte) []byte {
firstNonWS := 0
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
return bytes.TrimSpace(s.PopUntil(';', '\t', '\n', '\x0c', '\r', ' '))
}
return in[firstNonWS:]
}
func isWS(b byte) bool {
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
}

View File

@@ -0,0 +1,125 @@
package csv
import (
"bytes"
"github.com/gabriel-vasile/mimetype/internal/scan"
)
// Parser is a CSV reader that only counts fields.
// It avoids allocating/copying memory and to verify behaviour, it is tested
// and fuzzed against encoding/csv parser.
type Parser struct {
comma byte
comment byte
s scan.Bytes
}
func NewParser(comma, comment byte, s scan.Bytes) *Parser {
return &Parser{
comma: comma,
comment: comment,
s: s,
}
}
func (r *Parser) readLine() (line []byte, cutShort bool) {
line = r.s.ReadSlice('\n')
n := len(line)
if n > 0 && line[n-1] == '\r' {
return line[:n-1], false // drop \r at end of line
}
// This line is problematic. The logic from CountFields comes from
// encoding/csv.Reader which relies on mutating the input bytes.
// https://github.com/golang/go/blob/b3251514531123d7fd007682389bce7428d159a0/src/encoding/csv/reader.go#L275-L279
// To avoid mutating the input, we return cutShort. #680
if n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
return line[:n-2], true
}
return line, false
}
// CountFields reads one CSV line and counts how many records that line contained.
// hasMore reports whether there are more lines in the input.
// collectIndexes makes CountFields return a list of indexes where CSV fields
// start in the line. These indexes are used to test the correctness against the
// encoding/csv parser.
func (r *Parser) CountFields(collectIndexes bool) (fields int, fieldPos []int, hasMore bool) {
finished := false
var line scan.Bytes
cutShort := false
for {
line, cutShort = r.readLine()
if finished {
return 0, nil, false
}
finished = len(r.s) == 0 && len(line) == 0
if len(line) == lengthNL(line) {
line = nil
continue // Skip empty lines.
}
if len(line) > 0 && line[0] == r.comment {
line = nil
continue
}
break
}
indexes := []int{}
originalLine := line
parseField:
for {
if len(line) == 0 || line[0] != '"' { // non-quoted string field
fields++
if collectIndexes {
indexes = append(indexes, len(originalLine)-len(line))
}
i := bytes.IndexByte(line, r.comma)
if i >= 0 {
line.Advance(i + 1) // 1 to get over ending comma
continue parseField
}
break parseField
} else { // Quoted string field.
if collectIndexes {
indexes = append(indexes, len(originalLine)-len(line))
}
line.Advance(1) // get over starting quote
for {
i := bytes.IndexByte(line, '"')
if i >= 0 {
line.Advance(i + 1) // 1 for ending quote
switch rn := line.Peek(); {
case rn == '"':
line.Advance(1)
case rn == r.comma:
line.Advance(1)
fields++
continue parseField
case lengthNL(line) == len(line):
fields++
break parseField
}
} else if len(line) > 0 || cutShort {
line, cutShort = r.readLine()
originalLine = line
} else {
fields++
break parseField
}
}
}
}
return fields, indexes, fields != 0
}
// lengthNL reports the number of bytes for the trailing \n.
func lengthNL(b []byte) int {
if len(b) > 0 && b[len(b)-1] == '\n' {
return 1
}
return 0
}

View File

@@ -1,567 +0,0 @@
// Copyright (c) 2009 The Go Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Package json provides a JSON value parser state machine.
// This package is almost entirely copied from the Go stdlib.
// Changes made to it permit users of the package to tell
// if some slice of bytes is a valid beginning of a json string.
package json
import (
"fmt"
"sync"
)
type (
scanStatus int
)
const (
parseObjectKey = iota // parsing object key (before colon)
parseObjectValue // parsing object value (after colon)
parseArrayValue // parsing array value
scanContinue scanStatus = iota // uninteresting byte
scanBeginLiteral // end implied by next result != scanContinue
scanBeginObject // begin object
scanObjectKey // just finished object key (string)
scanObjectValue // just finished non-last object value
scanEndObject // end object (implies scanObjectValue if possible)
scanBeginArray // begin array
scanArrayValue // just finished array value
scanEndArray // end array (implies scanArrayValue if possible)
scanSkipSpace // space byte; can skip; known to be last "continue" result
scanEnd // top-level value ended *before* this byte; known to be first "stop" result
scanError // hit an error, scanner.err.
// This limits the max nesting depth to prevent stack overflow.
// This is permitted by https://tools.ietf.org/html/rfc7159#section-9
maxNestingDepth = 10000
)
type (
scanner struct {
step func(*scanner, byte) scanStatus
parseState []int
endTop bool
err error
index int
}
)
var scannerPool = sync.Pool{
New: func() any {
return &scanner{}
},
}
func newScanner() *scanner {
s := scannerPool.Get().(*scanner)
s.reset()
return s
}
func freeScanner(s *scanner) {
// Avoid hanging on to too much memory in extreme cases.
if len(s.parseState) > 1024 {
s.parseState = nil
}
scannerPool.Put(s)
}
// Scan returns the number of bytes scanned and if there was any error
// in trying to reach the end of data.
func Scan(data []byte) (int, error) {
s := newScanner()
defer freeScanner(s)
_ = checkValid(data, s)
return s.index, s.err
}
// checkValid verifies that data is valid JSON-encoded data.
// scan is passed in for use by checkValid to avoid an allocation.
func checkValid(data []byte, scan *scanner) error {
for _, c := range data {
scan.index++
if scan.step(scan, c) == scanError {
return scan.err
}
}
if scan.eof() == scanError {
return scan.err
}
return nil
}
func isSpace(c byte) bool {
return c == ' ' || c == '\t' || c == '\r' || c == '\n'
}
func (s *scanner) reset() {
s.step = stateBeginValue
s.parseState = s.parseState[0:0]
s.err = nil
s.endTop = false
s.index = 0
}
// eof tells the scanner that the end of input has been reached.
// It returns a scan status just as s.step does.
func (s *scanner) eof() scanStatus {
if s.err != nil {
return scanError
}
if s.endTop {
return scanEnd
}
s.step(s, ' ')
if s.endTop {
return scanEnd
}
if s.err == nil {
s.err = fmt.Errorf("unexpected end of JSON input")
}
return scanError
}
// pushParseState pushes a new parse state p onto the parse stack.
// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned.
func (s *scanner) pushParseState(c byte, newParseState int, successState scanStatus) scanStatus {
s.parseState = append(s.parseState, newParseState)
if len(s.parseState) <= maxNestingDepth {
return successState
}
return s.error(c, "exceeded max depth")
}
// popParseState pops a parse state (already obtained) off the stack
// and updates s.step accordingly.
func (s *scanner) popParseState() {
n := len(s.parseState) - 1
s.parseState = s.parseState[0:n]
if n == 0 {
s.step = stateEndTop
s.endTop = true
} else {
s.step = stateEndValue
}
}
// stateBeginValueOrEmpty is the state after reading `[`.
func stateBeginValueOrEmpty(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == ']' {
return stateEndValue(s, c)
}
return stateBeginValue(s, c)
}
// stateBeginValue is the state at the beginning of the input.
func stateBeginValue(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
switch c {
case '{':
s.step = stateBeginStringOrEmpty
return s.pushParseState(c, parseObjectKey, scanBeginObject)
case '[':
s.step = stateBeginValueOrEmpty
return s.pushParseState(c, parseArrayValue, scanBeginArray)
case '"':
s.step = stateInString
return scanBeginLiteral
case '-':
s.step = stateNeg
return scanBeginLiteral
case '0': // beginning of 0.123
s.step = state0
return scanBeginLiteral
case 't': // beginning of true
s.step = stateT
return scanBeginLiteral
case 'f': // beginning of false
s.step = stateF
return scanBeginLiteral
case 'n': // beginning of null
s.step = stateN
return scanBeginLiteral
}
if '1' <= c && c <= '9' { // beginning of 1234.5
s.step = state1
return scanBeginLiteral
}
return s.error(c, "looking for beginning of value")
}
// stateBeginStringOrEmpty is the state after reading `{`.
func stateBeginStringOrEmpty(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == '}' {
n := len(s.parseState)
s.parseState[n-1] = parseObjectValue
return stateEndValue(s, c)
}
return stateBeginString(s, c)
}
// stateBeginString is the state after reading `{"key": value,`.
func stateBeginString(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == '"' {
s.step = stateInString
return scanBeginLiteral
}
return s.error(c, "looking for beginning of object key string")
}
// stateEndValue is the state after completing a value,
// such as after reading `{}` or `true` or `["x"`.
func stateEndValue(s *scanner, c byte) scanStatus {
n := len(s.parseState)
if n == 0 {
// Completed top-level before the current byte.
s.step = stateEndTop
s.endTop = true
return stateEndTop(s, c)
}
if c <= ' ' && isSpace(c) {
s.step = stateEndValue
return scanSkipSpace
}
ps := s.parseState[n-1]
switch ps {
case parseObjectKey:
if c == ':' {
s.parseState[n-1] = parseObjectValue
s.step = stateBeginValue
return scanObjectKey
}
return s.error(c, "after object key")
case parseObjectValue:
if c == ',' {
s.parseState[n-1] = parseObjectKey
s.step = stateBeginString
return scanObjectValue
}
if c == '}' {
s.popParseState()
return scanEndObject
}
return s.error(c, "after object key:value pair")
case parseArrayValue:
if c == ',' {
s.step = stateBeginValue
return scanArrayValue
}
if c == ']' {
s.popParseState()
return scanEndArray
}
return s.error(c, "after array element")
}
return s.error(c, "")
}
// stateEndTop is the state after finishing the top-level value,
// such as after reading `{}` or `[1,2,3]`.
// Only space characters should be seen now.
func stateEndTop(s *scanner, c byte) scanStatus {
if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
// Complain about non-space byte on next call.
s.error(c, "after top-level value")
}
return scanEnd
}
// stateInString is the state after reading `"`.
func stateInString(s *scanner, c byte) scanStatus {
if c == '"' {
s.step = stateEndValue
return scanContinue
}
if c == '\\' {
s.step = stateInStringEsc
return scanContinue
}
if c < 0x20 {
return s.error(c, "in string literal")
}
return scanContinue
}
// stateInStringEsc is the state after reading `"\` during a quoted string.
func stateInStringEsc(s *scanner, c byte) scanStatus {
switch c {
case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
s.step = stateInString
return scanContinue
case 'u':
s.step = stateInStringEscU
return scanContinue
}
return s.error(c, "in string escape code")
}
// stateInStringEscU is the state after reading `"\u` during a quoted string.
func stateInStringEscU(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU1
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
func stateInStringEscU1(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU12
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
func stateInStringEscU12(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU123
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
func stateInStringEscU123(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInString
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateNeg is the state after reading `-` during a number.
func stateNeg(s *scanner, c byte) scanStatus {
if c == '0' {
s.step = state0
return scanContinue
}
if '1' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return s.error(c, "in numeric literal")
}
// state1 is the state after reading a non-zero integer during a number,
// such as after reading `1` or `100` but not `0`.
func state1(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return state0(s, c)
}
// state0 is the state after reading `0` during a number.
func state0(s *scanner, c byte) scanStatus {
if c == '.' {
s.step = stateDot
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateDot is the state after reading the integer and decimal point in a number,
// such as after reading `1.`.
func stateDot(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = stateDot0
return scanContinue
}
return s.error(c, "after decimal point in numeric literal")
}
// stateDot0 is the state after reading the integer, decimal point, and subsequent
// digits of a number, such as after reading `3.14`.
func stateDot0(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateE is the state after reading the mantissa and e in a number,
// such as after reading `314e` or `0.314e`.
func stateE(s *scanner, c byte) scanStatus {
if c == '+' || c == '-' {
s.step = stateESign
return scanContinue
}
return stateESign(s, c)
}
// stateESign is the state after reading the mantissa, e, and sign in a number,
// such as after reading `314e-` or `0.314e+`.
func stateESign(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = stateE0
return scanContinue
}
return s.error(c, "in exponent of numeric literal")
}
// stateE0 is the state after reading the mantissa, e, optional sign,
// and at least one digit of the exponent in a number,
// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
func stateE0(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
return scanContinue
}
return stateEndValue(s, c)
}
// stateT is the state after reading `t`.
func stateT(s *scanner, c byte) scanStatus {
if c == 'r' {
s.step = stateTr
return scanContinue
}
return s.error(c, "in literal true (expecting 'r')")
}
// stateTr is the state after reading `tr`.
func stateTr(s *scanner, c byte) scanStatus {
if c == 'u' {
s.step = stateTru
return scanContinue
}
return s.error(c, "in literal true (expecting 'u')")
}
// stateTru is the state after reading `tru`.
func stateTru(s *scanner, c byte) scanStatus {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal true (expecting 'e')")
}
// stateF is the state after reading `f`.
func stateF(s *scanner, c byte) scanStatus {
if c == 'a' {
s.step = stateFa
return scanContinue
}
return s.error(c, "in literal false (expecting 'a')")
}
// stateFa is the state after reading `fa`.
func stateFa(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateFal
return scanContinue
}
return s.error(c, "in literal false (expecting 'l')")
}
// stateFal is the state after reading `fal`.
func stateFal(s *scanner, c byte) scanStatus {
if c == 's' {
s.step = stateFals
return scanContinue
}
return s.error(c, "in literal false (expecting 's')")
}
// stateFals is the state after reading `fals`.
func stateFals(s *scanner, c byte) scanStatus {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal false (expecting 'e')")
}
// stateN is the state after reading `n`.
func stateN(s *scanner, c byte) scanStatus {
if c == 'u' {
s.step = stateNu
return scanContinue
}
return s.error(c, "in literal null (expecting 'u')")
}
// stateNu is the state after reading `nu`.
func stateNu(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateNul
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateNul is the state after reading `nul`.
func stateNul(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateError is the state after reaching a syntax error,
// such as after reading `[1}` or `5.1.2`.
func stateError(s *scanner, c byte) scanStatus {
return scanError
}
// error records an error and switches to the error state.
func (s *scanner) error(c byte, context string) scanStatus {
s.step = stateError
s.err = fmt.Errorf("invalid character <<%c>> %s", c, context)
return scanError
}

View File

@@ -0,0 +1,478 @@
package json
import (
"bytes"
"sync"
)
const (
QueryNone = "json"
QueryGeo = "geo"
QueryHAR = "har"
QueryGLTF = "gltf"
maxRecursion = 4096
)
var queries = map[string][]query{
QueryNone: nil,
QueryGeo: {{
SearchPath: [][]byte{[]byte("type")},
SearchVals: [][]byte{
[]byte(`"Feature"`),
[]byte(`"FeatureCollection"`),
[]byte(`"Point"`),
[]byte(`"LineString"`),
[]byte(`"Polygon"`),
[]byte(`"MultiPoint"`),
[]byte(`"MultiLineString"`),
[]byte(`"MultiPolygon"`),
[]byte(`"GeometryCollection"`),
},
}},
QueryHAR: {{
SearchPath: [][]byte{[]byte("log"), []byte("version")},
}, {
SearchPath: [][]byte{[]byte("log"), []byte("creator")},
}, {
SearchPath: [][]byte{[]byte("log"), []byte("entries")},
}},
QueryGLTF: {{
SearchPath: [][]byte{[]byte("asset"), []byte("version")},
SearchVals: [][]byte{[]byte(`"1.0"`), []byte(`"2.0"`)},
}},
}
var parserPool = sync.Pool{
New: func() any {
return &parserState{maxRecursion: maxRecursion}
},
}
// parserState holds the state of JSON parsing. The number of inspected bytes,
// the current path inside the JSON object, etc.
type parserState struct {
// ib represents the number of inspected bytes.
// Because mimetype limits itself to only reading the header of the file,
// it means sometimes the input JSON can be truncated. In that case, we want
// to still detect it as JSON, even if it's invalid/truncated.
// When ib == len(input) it means the JSON was valid (at least the header).
ib int
maxRecursion int
// currPath keeps a track of the JSON keys parsed up.
// It works only for JSON objects. JSON arrays are ignored
// mainly because the functionality is not needed.
currPath [][]byte
// firstToken stores the first JSON token encountered in input.
// TODO: performance would be better if we would stop parsing as soon
// as we see that first token is not what we are interested in.
firstToken int
// querySatisfied is true if both path and value of any queries passed to
// consumeAny are satisfied.
querySatisfied bool
}
// query holds information about a combination of {"key": "val"} that we're trying
// to search for inside the JSON.
type query struct {
// SearchPath represents the whole path to look for inside the JSON.
// ex: [][]byte{[]byte("foo"), []byte("bar")} matches {"foo": {"bar": "baz"}}
SearchPath [][]byte
// SearchVals represents values to look for when the SearchPath is found.
// Each SearchVal element is tried until one of them matches (logical OR.)
SearchVals [][]byte
}
func eq(path1, path2 [][]byte) bool {
if len(path1) != len(path2) {
return false
}
for i := range path1 {
if !bytes.Equal(path1[i], path2[i]) {
return false
}
}
return true
}
// LooksLikeObjectOrArray reports if first non white space character from raw
// is either { or [. Parsing raw as JSON is a heavy operation. When receiving some
// text input we can skip parsing if the input does not even look like JSON.
func LooksLikeObjectOrArray(raw []byte) bool {
for i := range raw {
if isSpace(raw[i]) {
continue
}
return raw[i] == '{' || raw[i] == '['
}
return false
}
// Parse will take out a parser from the pool depending on queryType and tries
// to parse raw bytes as JSON.
func Parse(queryType string, raw []byte) (parsed, inspected, firstToken int, querySatisfied bool) {
p := parserPool.Get().(*parserState)
defer func() {
// Avoid hanging on to too much memory in extreme input cases.
if len(p.currPath) > 128 {
p.currPath = nil
}
parserPool.Put(p)
}()
p.reset()
qs := queries[queryType]
got := p.consumeAny(raw, qs, 0)
return got, p.ib, p.firstToken, p.querySatisfied
}
func (p *parserState) reset() {
p.ib = 0
p.currPath = p.currPath[0:0]
p.firstToken = TokInvalid
p.querySatisfied = false
}
func (p *parserState) consumeSpace(b []byte) (n int) {
for len(b) > 0 && isSpace(b[0]) {
b = b[1:]
n++
p.ib++
}
return n
}
func (p *parserState) consumeConst(b, cnst []byte) int {
lb := len(b)
for i, c := range cnst {
if lb > i && b[i] == c {
p.ib++
} else {
return 0
}
}
return len(cnst)
}
func (p *parserState) consumeString(b []byte) (n int) {
var c byte
for len(b[n:]) > 0 {
c, n = b[n], n+1
p.ib++
switch c {
case '\\':
if len(b[n:]) == 0 {
return 0
}
switch b[n] {
case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
n++
p.ib++
continue
case 'u':
n++
p.ib++
for j := 0; j < 4 && len(b[n:]) > 0; j++ {
if !isXDigit(b[n]) {
return 0
}
n++
p.ib++
}
continue
default:
return 0
}
case '"':
return n
default:
continue
}
}
return 0
}
func (p *parserState) consumeNumber(b []byte) (n int) {
got := false
var i int
if len(b) == 0 {
goto out
}
if b[0] == '-' {
b, i = b[1:], i+1
p.ib++
}
for len(b) > 0 {
if !isDigit(b[0]) {
break
}
got = true
b, i = b[1:], i+1
p.ib++
}
if len(b) == 0 {
goto out
}
if b[0] == '.' {
b, i = b[1:], i+1
p.ib++
}
for len(b) > 0 {
if !isDigit(b[0]) {
break
}
got = true
b, i = b[1:], i+1
p.ib++
}
if len(b) == 0 {
goto out
}
if got && (b[0] == 'e' || b[0] == 'E') {
b, i = b[1:], i+1
p.ib++
got = false
if len(b) == 0 {
goto out
}
if b[0] == '+' || b[0] == '-' {
b, i = b[1:], i+1
p.ib++
}
for len(b) > 0 {
if !isDigit(b[0]) {
break
}
got = true
b, i = b[1:], i+1
p.ib++
}
}
out:
if got {
return i
}
return 0
}
func (p *parserState) consumeArray(b []byte, qs []query, lvl int) (n int) {
p.appendPath([]byte{'['}, qs)
if len(b) == 0 {
return 0
}
for n < len(b) {
n += p.consumeSpace(b[n:])
if len(b[n:]) == 0 {
return 0
}
if b[n] == ']' {
p.ib++
p.popLastPath(qs)
return n + 1
}
innerParsed := p.consumeAny(b[n:], qs, lvl)
if innerParsed == 0 {
return 0
}
n += innerParsed
if len(b[n:]) == 0 {
return 0
}
switch b[n] {
case ',':
n += 1
p.ib++
continue
case ']':
p.ib++
return n + 1
default:
return 0
}
}
return 0
}
func queryPathMatch(qs []query, path [][]byte) int {
for i := range qs {
if eq(qs[i].SearchPath, path) {
return i
}
}
return -1
}
// appendPath will append a path fragment if queries is not empty.
// If we don't need query functionality (just checking if a JSON is valid),
// then we can skip keeping track of the path we're currently in.
func (p *parserState) appendPath(path []byte, qs []query) {
if len(qs) != 0 {
p.currPath = append(p.currPath, path)
}
}
func (p *parserState) popLastPath(qs []query) {
if len(qs) != 0 {
p.currPath = p.currPath[:len(p.currPath)-1]
}
}
func (p *parserState) consumeObject(b []byte, qs []query, lvl int) (n int) {
for n < len(b) {
n += p.consumeSpace(b[n:])
if len(b[n:]) == 0 {
return 0
}
if b[n] == '}' {
p.ib++
return n + 1
}
if b[n] != '"' {
return 0
} else {
n += 1
p.ib++
}
// queryMatched stores the index of the query satisfying the current path.
queryMatched := -1
if keyLen := p.consumeString(b[n:]); keyLen == 0 {
return 0
} else {
p.appendPath(b[n:n+keyLen-1], qs)
if !p.querySatisfied {
queryMatched = queryPathMatch(qs, p.currPath)
}
n += keyLen
}
n += p.consumeSpace(b[n:])
if len(b[n:]) == 0 {
return 0
}
if b[n] != ':' {
return 0
} else {
n += 1
p.ib++
}
n += p.consumeSpace(b[n:])
if len(b[n:]) == 0 {
return 0
}
if valLen := p.consumeAny(b[n:], qs, lvl); valLen == 0 {
return 0
} else {
if queryMatched != -1 {
q := qs[queryMatched]
if len(q.SearchVals) == 0 {
p.querySatisfied = true
}
for _, val := range q.SearchVals {
if bytes.Equal(val, bytes.TrimSpace(b[n:n+valLen])) {
p.querySatisfied = true
}
}
}
n += valLen
}
if len(b[n:]) == 0 {
return 0
}
switch b[n] {
case ',':
p.popLastPath(qs)
n++
p.ib++
continue
case '}':
p.popLastPath(qs)
p.ib++
return n + 1
default:
return 0
}
}
return 0
}
func (p *parserState) consumeAny(b []byte, qs []query, lvl int) (n int) {
// Avoid too much recursion.
if p.maxRecursion != 0 && lvl > p.maxRecursion {
return 0
}
if len(qs) == 0 {
p.querySatisfied = true
}
n += p.consumeSpace(b)
if len(b[n:]) == 0 {
return 0
}
var t, rv int
switch b[n] {
case '"':
n++
p.ib++
rv = p.consumeString(b[n:])
t = TokString
case '[':
n++
p.ib++
rv = p.consumeArray(b[n:], qs, lvl+1)
t = TokArray
case '{':
n++
p.ib++
rv = p.consumeObject(b[n:], qs, lvl+1)
t = TokObject
case 't':
rv = p.consumeConst(b[n:], []byte("true"))
t = TokTrue
case 'f':
rv = p.consumeConst(b[n:], []byte("false"))
t = TokFalse
case 'n':
rv = p.consumeConst(b[n:], []byte("null"))
t = TokNull
default:
rv = p.consumeNumber(b[n:])
t = TokNumber
}
if lvl == 0 {
p.firstToken = t
}
if rv <= 0 {
return n
}
n += rv
n += p.consumeSpace(b[n:])
return n
}
func isSpace(c byte) bool {
return c == ' ' || c == '\t' || c == '\r' || c == '\n'
}
func isDigit(c byte) bool {
return '0' <= c && c <= '9'
}
func isXDigit(c byte) bool {
if isDigit(c) {
return true
}
return ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
}
const (
TokInvalid = 0
TokNull = 1 << iota
TokTrue
TokFalse
TokNumber
TokString
TokArray
TokObject
TokComma
)

View File

@@ -137,7 +137,7 @@ func tarParseOctal(b []byte) int64 {
if b == 0 {
break
}
if !(b >= '0' && b <= '7') {
if b < '0' || b > '7' {
return -1
}
ret = (ret << 3) | int64(b-'0')

View File

@@ -71,7 +71,7 @@ func Dbf(raw []byte, limit uint32) bool {
}
// 3rd and 4th bytes contain the last update month and day of month.
if !(0 < raw[2] && raw[2] < 13 && 0 < raw[3] && raw[3] < 32) {
if raw[2] == 0 || raw[2] > 12 || raw[3] == 0 || raw[3] > 31 {
return false
}
@@ -153,7 +153,7 @@ func Marc(raw []byte, limit uint32) bool {
return bytes.Contains(raw[:min(2048, len(raw))], []byte{0x1E})
}
// Glb matches a glTF model format file.
// GLB matches a glTF model format file.
// GLB is the binary file format representation of 3D models saved in
// the GL transmission Format (glTF).
// GLB uses little endian and its header structure is as follows:
@@ -168,7 +168,7 @@ func Marc(raw []byte, limit uint32) bool {
//
// [glTF specification]: https://registry.khronos.org/glTF/specs/2.0/glTF-2.0.html
// [IANA glTF entry]: https://www.iana.org/assignments/media-types/model/gltf-binary
var Glb = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"),
var GLB = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"),
[]byte("\x67\x6C\x54\x46\x01\x00\x00\x00"))
// TzIf matches a Time Zone Information Format (TZif) file.

View File

@@ -1,18 +1,11 @@
package magic
import "bytes"
import (
"bytes"
"encoding/binary"
)
var (
// Pdf matches a Portable Document Format file.
// https://github.com/file/file/blob/11010cc805546a3e35597e67e1129a481aed40e8/magic/Magdir/pdf
Pdf = prefix(
// usual pdf signature
[]byte("%PDF-"),
// new-line prefixed signature
[]byte("\012%PDF-"),
// UTF-8 BOM prefixed signature
[]byte("\xef\xbb\xbf%PDF-"),
)
// Fdf matches a Forms Data Format file.
Fdf = prefix([]byte("%FDF"))
// Mobi matches a Mobi file.
@@ -21,8 +14,18 @@ var (
Lit = prefix([]byte("ITOLITLS"))
)
// PDF matches a Portable Document Format file.
// The %PDF- header should be the first thing inside the file but many
// implementations don't follow the rule. The PDF spec at Appendix H says the
// signature can be prepended by anything.
// https://bugs.astron.com/view.php?id=446
func PDF(raw []byte, _ uint32) bool {
raw = raw[:min(len(raw), 1024)]
return bytes.Contains(raw, []byte("%PDF-"))
}
// DjVu matches a DjVu file.
func DjVu(raw []byte, limit uint32) bool {
func DjVu(raw []byte, _ uint32) bool {
if len(raw) < 12 {
return false
}
@@ -36,7 +39,7 @@ func DjVu(raw []byte, limit uint32) bool {
}
// P7s matches an .p7s signature File (PEM, Base64).
func P7s(raw []byte, limit uint32) bool {
func P7s(raw []byte, _ uint32) bool {
// Check for PEM Encoding.
if bytes.HasPrefix(raw, []byte("-----BEGIN PKCS7")) {
return true
@@ -60,3 +63,21 @@ func P7s(raw []byte, limit uint32) bool {
return false
}
// Lotus123 matches a Lotus 1-2-3 spreadsheet document.
func Lotus123(raw []byte, _ uint32) bool {
if len(raw) <= 20 {
return false
}
version := binary.BigEndian.Uint32(raw)
if version == 0x00000200 {
return raw[6] != 0 && raw[7] == 0
}
return version == 0x00001a00 && raw[20] > 0 && raw[20] < 32
}
// CHM matches a Microsoft Compiled HTML Help file.
func CHM(raw []byte, _ uint32) bool {
return bytes.HasPrefix(raw, []byte("ITSF\003\000\000\000\x60\000\000\000"))
}

View File

@@ -12,13 +12,13 @@ func Shp(raw []byte, limit uint32) bool {
return false
}
if !(binary.BigEndian.Uint32(raw[0:4]) == 9994 &&
binary.BigEndian.Uint32(raw[4:8]) == 0 &&
binary.BigEndian.Uint32(raw[8:12]) == 0 &&
binary.BigEndian.Uint32(raw[12:16]) == 0 &&
binary.BigEndian.Uint32(raw[16:20]) == 0 &&
binary.BigEndian.Uint32(raw[20:24]) == 0 &&
binary.LittleEndian.Uint32(raw[28:32]) == 1000) {
if binary.BigEndian.Uint32(raw[0:4]) != 9994 ||
binary.BigEndian.Uint32(raw[4:8]) != 0 ||
binary.BigEndian.Uint32(raw[8:12]) != 0 ||
binary.BigEndian.Uint32(raw[12:16]) != 0 ||
binary.BigEndian.Uint32(raw[16:20]) != 0 ||
binary.BigEndian.Uint32(raw[20:24]) != 0 ||
binary.LittleEndian.Uint32(raw[28:32]) != 1000 {
return false
}

View File

@@ -4,6 +4,8 @@ package magic
import (
"bytes"
"fmt"
"github.com/gabriel-vasile/mimetype/internal/scan"
)
type (
@@ -74,12 +76,13 @@ func ciCheck(sig, raw []byte) bool {
// matches the raw input.
func xml(sigs ...xmlSig) Detector {
return func(raw []byte, limit uint32) bool {
raw = trimLWS(raw)
if len(raw) == 0 {
b := scan.Bytes(raw)
b.TrimLWS()
if len(b) == 0 {
return false
}
for _, s := range sigs {
if xmlCheck(s, raw) {
if xmlCheck(s, b) {
return true
}
}
@@ -104,19 +107,19 @@ func xmlCheck(sig xmlSig, raw []byte) bool {
// matches the raw input.
func markup(sigs ...[]byte) Detector {
return func(raw []byte, limit uint32) bool {
if bytes.HasPrefix(raw, []byte{0xEF, 0xBB, 0xBF}) {
b := scan.Bytes(raw)
if bytes.HasPrefix(b, []byte{0xEF, 0xBB, 0xBF}) {
// We skip the UTF-8 BOM if present to ensure we correctly
// process any leading whitespace. The presence of the BOM
// is taken into account during charset detection in charset.go.
raw = trimLWS(raw[3:])
} else {
raw = trimLWS(raw)
b.Advance(3)
}
if len(raw) == 0 {
b.TrimLWS()
if len(b) == 0 {
return false
}
for _, s := range sigs {
if markupCheck(s, raw) {
if markupCheck(s, b) {
return true
}
}
@@ -139,7 +142,7 @@ func markupCheck(sig, raw []byte) bool {
}
}
// Next byte must be space or right angle bracket.
if db := raw[len(sig)]; db != ' ' && db != '>' {
if db := raw[len(sig)]; !scan.ByteIsWS(db) && db != '>' {
return false
}
@@ -183,8 +186,10 @@ func newXMLSig(localName, xmlns string) xmlSig {
// /usr/bin/env is the interpreter, php is the first and only argument.
func shebang(sigs ...[]byte) Detector {
return func(raw []byte, limit uint32) bool {
b := scan.Bytes(raw)
line := b.Line()
for _, s := range sigs {
if shebangCheck(s, firstLine(raw)) {
if shebangCheck(s, line) {
return true
}
}
@@ -192,7 +197,7 @@ func shebang(sigs ...[]byte) Detector {
}
}
func shebangCheck(sig, raw []byte) bool {
func shebangCheck(sig []byte, raw scan.Bytes) bool {
if len(raw) < len(sig)+2 {
return false
}
@@ -200,52 +205,8 @@ func shebangCheck(sig, raw []byte) bool {
return false
}
return bytes.Equal(trimLWS(trimRWS(raw[2:])), sig)
}
// trimLWS trims whitespace from beginning of the input.
func trimLWS(in []byte) []byte {
firstNonWS := 0
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
}
return in[firstNonWS:]
}
// trimRWS trims whitespace from the end of the input.
func trimRWS(in []byte) []byte {
lastNonWS := len(in) - 1
for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- {
}
return in[:lastNonWS+1]
}
func firstLine(in []byte) []byte {
lineEnd := 0
for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ {
}
return in[:lineEnd]
}
func isWS(b byte) bool {
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
type readBuf []byte
func (b *readBuf) advance(n int) bool {
if n < 0 || len(*b) < n {
return false
}
*b = (*b)[n:]
return true
raw.Advance(2) // skip #! we checked above
raw.TrimLWS()
raw.TrimRWS()
return bytes.Equal(raw, sig)
}

View File

@@ -7,17 +7,34 @@ import (
// Xlsx matches a Microsoft Excel 2007 file.
func Xlsx(raw []byte, limit uint32) bool {
return zipContains(raw, []byte("xl/"), true)
return msoxml(raw, zipEntries{{
name: []byte("xl/"),
dir: true,
}}, 100)
}
// Docx matches a Microsoft Word 2007 file.
func Docx(raw []byte, limit uint32) bool {
return zipContains(raw, []byte("word/"), true)
return msoxml(raw, zipEntries{{
name: []byte("word/"),
dir: true,
}}, 100)
}
// Pptx matches a Microsoft PowerPoint 2007 file.
func Pptx(raw []byte, limit uint32) bool {
return zipContains(raw, []byte("ppt/"), true)
return msoxml(raw, zipEntries{{
name: []byte("ppt/"),
dir: true,
}}, 100)
}
// Visio matches a Microsoft Visio 2013+ file.
func Visio(raw []byte, limit uint32) bool {
return msoxml(raw, zipEntries{{
name: []byte("visio/"),
dir: true,
}}, 100)
}
// Ole matches an Open Linking and Embedding file.
@@ -157,6 +174,14 @@ func Msi(raw []byte, limit uint32) bool {
})
}
// One matches a Microsoft OneNote file.
func One(raw []byte, limit uint32) bool {
return bytes.HasPrefix(raw, []byte{
0xe4, 0x52, 0x5c, 0x7b, 0x8c, 0xd8, 0xa7, 0x4d,
0xae, 0xb1, 0x53, 0x78, 0xd0, 0x29, 0x96, 0xd3,
})
}
// Helper to match by a specific CLSID of a compound file.
//
// http://fileformats.archiveteam.org/wiki/Microsoft_Compound_File

View File

@@ -0,0 +1,111 @@
package magic
import (
"bytes"
"strconv"
"github.com/gabriel-vasile/mimetype/internal/scan"
)
// NetPBM matches a Netpbm Portable BitMap ASCII/Binary file.
//
// See: https://en.wikipedia.org/wiki/Netpbm
func NetPBM(raw []byte, _ uint32) bool {
return netp(raw, "P1\n", "P4\n")
}
// NetPGM matches a Netpbm Portable GrayMap ASCII/Binary file.
//
// See: https://en.wikipedia.org/wiki/Netpbm
func NetPGM(raw []byte, _ uint32) bool {
return netp(raw, "P2\n", "P5\n")
}
// NetPPM matches a Netpbm Portable PixMap ASCII/Binary file.
//
// See: https://en.wikipedia.org/wiki/Netpbm
func NetPPM(raw []byte, _ uint32) bool {
return netp(raw, "P3\n", "P6\n")
}
// NetPAM matches a Netpbm Portable Arbitrary Map file.
//
// See: https://en.wikipedia.org/wiki/Netpbm
func NetPAM(raw []byte, _ uint32) bool {
if !bytes.HasPrefix(raw, []byte("P7\n")) {
return false
}
w, h, d, m, e := false, false, false, false, false
s := scan.Bytes(raw)
var l scan.Bytes
// Read line by line.
for i := 0; i < 128; i++ {
l = s.Line()
// If the line is empty or a comment, skip.
if len(l) == 0 || l.Peek() == '#' {
if len(s) == 0 {
return false
}
continue
} else if bytes.HasPrefix(l, []byte("TUPLTYPE")) {
continue
} else if bytes.HasPrefix(l, []byte("WIDTH ")) {
w = true
} else if bytes.HasPrefix(l, []byte("HEIGHT ")) {
h = true
} else if bytes.HasPrefix(l, []byte("DEPTH ")) {
d = true
} else if bytes.HasPrefix(l, []byte("MAXVAL ")) {
m = true
} else if bytes.HasPrefix(l, []byte("ENDHDR")) {
e = true
}
// When we reached header, return true if we collected all four required headers.
// WIDTH, HEIGHT, DEPTH and MAXVAL.
if e {
return w && h && d && m
}
}
return false
}
func netp(s scan.Bytes, prefixes ...string) bool {
foundPrefix := ""
for _, p := range prefixes {
if bytes.HasPrefix(s, []byte(p)) {
foundPrefix = p
}
}
if foundPrefix == "" {
return false
}
s.Advance(len(foundPrefix)) // jump over P1, P2, P3, etc.
var l scan.Bytes
// Read line by line.
for i := 0; i < 128; i++ {
l = s.Line()
// If the line is a comment, skip.
if l.Peek() == '#' {
continue
}
// If line has leading whitespace, then skip over whitespace.
for scan.ByteIsWS(l.Peek()) {
l.Advance(1)
}
if len(s) == 0 || len(l) > 0 {
break
}
}
// At this point l should be the two integers denoting the size of the matrix.
width := l.PopUntil(scan.ASCIISpaces...)
for scan.ByteIsWS(l.Peek()) {
l.Advance(1)
}
height := l.PopUntil(scan.ASCIISpaces...)
w, errw := strconv.ParseInt(string(width), 10, 64)
h, errh := strconv.ParseInt(string(height), 10, 64)
return errw == nil && errh == nil && w > 0 && h > 0
}

View File

@@ -2,11 +2,12 @@ package magic
import (
"bytes"
"strings"
"time"
"github.com/gabriel-vasile/mimetype/internal/charset"
"github.com/gabriel-vasile/mimetype/internal/json"
mkup "github.com/gabriel-vasile/mimetype/internal/markup"
"github.com/gabriel-vasile/mimetype/internal/scan"
)
var (
@@ -28,6 +29,7 @@ var (
[]byte("<BODY"),
[]byte("<BR"),
[]byte("<P"),
[]byte("<!--"),
)
// XML matches an Extensible Markup Language file.
XML = markup([]byte("<?XML"))
@@ -106,6 +108,18 @@ var (
[]byte("/usr/bin/python"),
[]byte("/usr/local/bin/python"),
[]byte("/usr/bin/env python"),
[]byte("/usr/bin/python2"),
[]byte("/usr/local/bin/python2"),
[]byte("/usr/bin/env python2"),
[]byte("/usr/bin/python3"),
[]byte("/usr/local/bin/python3"),
[]byte("/usr/bin/env python3"),
)
// Ruby matches a Ruby programming language file.
Ruby = shebang(
[]byte("/usr/bin/ruby"),
[]byte("/usr/local/bin/ruby"),
[]byte("/usr/bin/env ruby"),
)
// Tcl matches a Tcl programming language file.
Tcl = shebang(
@@ -121,19 +135,42 @@ var (
)
// Rtf matches a Rich Text Format file.
Rtf = prefix([]byte("{\\rtf"))
// Shell matches a shell script file.
Shell = shebang(
[]byte("/bin/sh"),
[]byte("/bin/bash"),
[]byte("/usr/local/bin/bash"),
[]byte("/usr/bin/env bash"),
[]byte("/bin/csh"),
[]byte("/usr/local/bin/csh"),
[]byte("/usr/bin/env csh"),
[]byte("/bin/dash"),
[]byte("/usr/local/bin/dash"),
[]byte("/usr/bin/env dash"),
[]byte("/bin/ksh"),
[]byte("/usr/local/bin/ksh"),
[]byte("/usr/bin/env ksh"),
[]byte("/bin/tcsh"),
[]byte("/usr/local/bin/tcsh"),
[]byte("/usr/bin/env tcsh"),
[]byte("/bin/zsh"),
[]byte("/usr/local/bin/zsh"),
[]byte("/usr/bin/env zsh"),
)
)
// Text matches a plain text file.
//
// TODO: This function does not parse BOM-less UTF16 and UTF32 files. Not really
// sure it should. Linux file utility also requires a BOM for UTF16 and UTF32.
func Text(raw []byte, limit uint32) bool {
func Text(raw []byte, _ uint32) bool {
// First look for BOM.
if cset := charset.FromBOM(raw); cset != "" {
return true
}
// Binary data bytes as defined here: https://mimesniff.spec.whatwg.org/#binary-data-byte
for _, b := range raw {
for i := 0; i < min(len(raw), 4096); i++ {
b := raw[i]
if b <= 0x08 ||
b == 0x0B ||
0x0E <= b && b <= 0x1A ||
@@ -144,6 +181,14 @@ func Text(raw []byte, limit uint32) bool {
return true
}
// XHTML matches an XHTML file. This check depends on the XML check to have passed.
func XHTML(raw []byte, limit uint32) bool {
raw = raw[:min(len(raw), 4096)]
b := scan.Bytes(raw)
return b.Search([]byte("<!DOCTYPE HTML"), scan.CompactWS|scan.IgnoreCase) != -1 ||
b.Search([]byte("<HTML XMLNS="), scan.CompactWS|scan.IgnoreCase) != -1
}
// Php matches a PHP: Hypertext Preprocessor file.
func Php(raw []byte, limit uint32) bool {
if res := phpPageF(raw, limit); res {
@@ -154,183 +199,180 @@ func Php(raw []byte, limit uint32) bool {
// JSON matches a JavaScript Object Notation file.
func JSON(raw []byte, limit uint32) bool {
raw = trimLWS(raw)
// #175 A single JSON string, number or bool is not considered JSON.
// JSON objects and arrays are reported as JSON.
if len(raw) < 2 || (raw[0] != '[' && raw[0] != '{') {
return false
}
parsed, err := json.Scan(raw)
// If the full file content was provided, check there is no error.
if limit == 0 || len(raw) < int(limit) {
return err == nil
}
// If a section of the file was provided, check if all of it was parsed.
return parsed == len(raw) && len(raw) > 0
return jsonHelper(raw, limit, json.QueryNone, json.TokObject|json.TokArray)
}
// GeoJSON matches a RFC 7946 GeoJSON file.
//
// GeoJSON detection implies searching for key:value pairs like: `"type": "Feature"`
// in the input.
// BUG(gabriel-vasile): The "type" key should be searched for in the root object.
func GeoJSON(raw []byte, limit uint32) bool {
raw = trimLWS(raw)
if len(raw) == 0 {
return jsonHelper(raw, limit, json.QueryGeo, json.TokObject)
}
// HAR matches a HAR Spec file.
// Spec: http://www.softwareishard.com/blog/har-12-spec/
func HAR(raw []byte, limit uint32) bool {
return jsonHelper(raw, limit, json.QueryHAR, json.TokObject)
}
// GLTF matches a GL Transmission Format (JSON) file.
// Visit [glTF specification] and [IANA glTF entry] for more details.
//
// [glTF specification]: https://registry.khronos.org/glTF/specs/2.0/glTF-2.0.html
// [IANA glTF entry]: https://www.iana.org/assignments/media-types/model/gltf+json
func GLTF(raw []byte, limit uint32) bool {
return jsonHelper(raw, limit, json.QueryGLTF, json.TokObject)
}
func jsonHelper(raw []byte, limit uint32, q string, wantTok int) bool {
if !json.LooksLikeObjectOrArray(raw) {
return false
}
// GeoJSON is always a JSON object, not a JSON array or any other JSON value.
if raw[0] != '{' {
lraw := len(raw)
parsed, inspected, firstToken, querySatisfied := json.Parse(q, raw)
if !querySatisfied || firstToken&wantTok == 0 {
return false
}
s := []byte(`"type"`)
si, sl := bytes.Index(raw, s), len(s)
if si == -1 {
return false
// If the full file content was provided, check that the whole input was parsed.
if limit == 0 || lraw < int(limit) {
return parsed == lraw
}
// If the "type" string is the suffix of the input,
// there is no need to search for the value of the key.
if si+sl == len(raw) {
return false
}
// Skip the "type" part.
raw = raw[si+sl:]
// Skip any whitespace before the colon.
raw = trimLWS(raw)
// Check for colon.
if len(raw) == 0 || raw[0] != ':' {
return false
}
// Skip any whitespace after the colon.
raw = trimLWS(raw[1:])
geoJSONTypes := [][]byte{
[]byte(`"Feature"`),
[]byte(`"FeatureCollection"`),
[]byte(`"Point"`),
[]byte(`"LineString"`),
[]byte(`"Polygon"`),
[]byte(`"MultiPoint"`),
[]byte(`"MultiLineString"`),
[]byte(`"MultiPolygon"`),
[]byte(`"GeometryCollection"`),
}
for _, t := range geoJSONTypes {
if bytes.HasPrefix(raw, t) {
return true
}
}
return false
// If a section of the file was provided, check if all of it was inspected.
// In other words, check that if there was a problem parsing, that problem
// occured at the last byte in the input.
return inspected == lraw && lraw > 0
}
// NdJSON matches a Newline delimited JSON file. All complete lines from raw
// must be valid JSON documents meaning they contain one of the valid JSON data
// types.
func NdJSON(raw []byte, limit uint32) bool {
lCount, hasObjOrArr := 0, false
raw = dropLastLine(raw, limit)
var l []byte
for len(raw) != 0 {
l, raw = scanLine(raw)
// Empty lines are allowed in NDJSON.
if l = trimRWS(trimLWS(l)); len(l) == 0 {
continue
}
_, err := json.Scan(l)
if err != nil {
lCount, objOrArr := 0, 0
s := scan.Bytes(raw)
s.DropLastLine(limit)
var l scan.Bytes
for len(s) != 0 {
l = s.Line()
_, inspected, firstToken, _ := json.Parse(json.QueryNone, l)
if len(l) != inspected {
return false
}
if l[0] == '[' || l[0] == '{' {
hasObjOrArr = true
if firstToken == json.TokArray || firstToken == json.TokObject {
objOrArr++
}
lCount++
}
return lCount > 1 && hasObjOrArr
}
// HAR matches a HAR Spec file.
// Spec: http://www.softwareishard.com/blog/har-12-spec/
func HAR(raw []byte, limit uint32) bool {
s := []byte(`"log"`)
si, sl := bytes.Index(raw, s), len(s)
if si == -1 {
return false
}
// If the "log" string is the suffix of the input,
// there is no need to search for the value of the key.
if si+sl == len(raw) {
return false
}
// Skip the "log" part.
raw = raw[si+sl:]
// Skip any whitespace before the colon.
raw = trimLWS(raw)
// Check for colon.
if len(raw) == 0 || raw[0] != ':' {
return false
}
// Skip any whitespace after the colon.
raw = trimLWS(raw[1:])
harJSONTypes := [][]byte{
[]byte(`"version"`),
[]byte(`"creator"`),
[]byte(`"entries"`),
}
for _, t := range harJSONTypes {
si := bytes.Index(raw, t)
if si > -1 {
return true
}
}
return false
return lCount > 1 && objOrArr > 0
}
// Svg matches a SVG file.
func Svg(raw []byte, limit uint32) bool {
return bytes.Contains(raw, []byte("<svg"))
return svgWithoutXMLDeclaration(raw) || svgWithXMLDeclaration(raw)
}
// svgWithoutXMLDeclaration matches a SVG image that does not have an XML header.
// Example:
//
// <!-- xml comment ignored -->
// <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
// <rect fill="#fff" stroke="#000" x="-70" y="-70" width="390" height="390"/>
// </svg>
func svgWithoutXMLDeclaration(s scan.Bytes) bool {
for scan.ByteIsWS(s.Peek()) {
s.Advance(1)
}
for mkup.SkipAComment(&s) {
}
if !bytes.HasPrefix(s, []byte("<svg")) {
return false
}
targetName, targetVal := "xmlns", "http://www.w3.org/2000/svg"
aName, aVal, hasMore := "", "", true
for hasMore {
aName, aVal, hasMore = mkup.GetAnAttribute(&s)
if aName == targetName && aVal == targetVal {
return true
}
if !hasMore {
return false
}
}
return false
}
// svgWithXMLDeclaration matches a SVG image that has an XML header.
// Example:
//
// <?xml version="1.0" encoding="UTF-8" standalone="no"?>
// <svg width="391" height="391" viewBox="-70.5 -70.5 391 391" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
// <rect fill="#fff" stroke="#000" x="-70" y="-70" width="390" height="390"/>
// </svg>
func svgWithXMLDeclaration(s scan.Bytes) bool {
for scan.ByteIsWS(s.Peek()) {
s.Advance(1)
}
if !bytes.HasPrefix(s, []byte("<?xml")) {
return false
}
// version is a required attribute for XML.
hasVersion := false
aName, hasMore := "", true
for hasMore {
aName, _, hasMore = mkup.GetAnAttribute(&s)
if aName == "version" {
hasVersion = true
break
}
if !hasMore {
break
}
}
if len(s) > 4096 {
s = s[:4096]
}
return hasVersion && bytes.Contains(s, []byte("<svg"))
}
// Srt matches a SubRip file.
func Srt(raw []byte, _ uint32) bool {
line, raw := scanLine(raw)
s := scan.Bytes(raw)
line := s.Line()
// First line must be 1.
if string(line) != "1" {
if len(line) != 1 || line[0] != '1' {
return false
}
line, raw = scanLine(raw)
secondLine := string(line)
// Timestamp format (e.g: 00:02:16,612 --> 00:02:19,376) limits secondLine
line = s.Line()
// Timestamp format (e.g: 00:02:16,612 --> 00:02:19,376) limits second line
// length to exactly 29 characters.
if len(secondLine) != 29 {
if len(line) != 29 {
return false
}
// Decimal separator of fractional seconds in the timestamps must be a
// comma, not a period.
if strings.Contains(secondLine, ".") {
if bytes.IndexByte(line, '.') != -1 {
return false
}
// Second line must be a time range.
ts := strings.Split(secondLine, " --> ")
if len(ts) != 2 {
sep := []byte(" --> ")
i := bytes.Index(line, sep)
if i == -1 {
return false
}
const layout = "15:04:05,000"
t0, err := time.Parse(layout, ts[0])
t0, err := time.Parse(layout, string(line[:i]))
if err != nil {
return false
}
t1, err := time.Parse(layout, ts[1])
t1, err := time.Parse(layout, string(line[i+len(sep):]))
if err != nil {
return false
}
@@ -338,7 +380,7 @@ func Srt(raw []byte, _ uint32) bool {
return false
}
line, _ = scanLine(raw)
line = s.Line()
// A third line must exist and not be empty. This is the actual subtitle text.
return len(line) != 0
}
@@ -367,15 +409,3 @@ func Vtt(raw []byte, limit uint32) bool {
return bytes.Equal(raw, []byte{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) || // UTF-8 BOM and "WEBVTT"
bytes.Equal(raw, []byte{0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) // "WEBVTT"
}
// dropCR drops a terminal \r from the data.
func dropCR(data []byte) []byte {
if len(data) > 0 && data[len(data)-1] == '\r' {
return data[0 : len(data)-1]
}
return data
}
func scanLine(b []byte) (line, remainder []byte) {
line, remainder, _ = bytes.Cut(b, []byte("\n"))
return dropCR(line), remainder
}

View File

@@ -1,77 +1,43 @@
package magic
import (
"bufio"
"bytes"
"encoding/csv"
"errors"
"io"
"sync"
"github.com/gabriel-vasile/mimetype/internal/csv"
"github.com/gabriel-vasile/mimetype/internal/scan"
)
// A bufio.Reader pool to alleviate problems with memory allocations.
var readerPool = sync.Pool{
New: func() any {
// Initiate with empty source reader.
return bufio.NewReader(nil)
},
}
func newReader(r io.Reader) *bufio.Reader {
br := readerPool.Get().(*bufio.Reader)
br.Reset(r)
return br
}
// Csv matches a comma-separated values file.
func Csv(raw []byte, limit uint32) bool {
// CSV matches a comma-separated values file.
func CSV(raw []byte, limit uint32) bool {
return sv(raw, ',', limit)
}
// Tsv matches a tab-separated values file.
func Tsv(raw []byte, limit uint32) bool {
// TSV matches a tab-separated values file.
func TSV(raw []byte, limit uint32) bool {
return sv(raw, '\t', limit)
}
func sv(in []byte, comma rune, limit uint32) bool {
in = dropLastLine(in, limit)
func sv(in []byte, comma byte, limit uint32) bool {
s := scan.Bytes(in)
s.DropLastLine(limit)
r := csv.NewParser(comma, '#', s)
br := newReader(bytes.NewReader(in))
defer readerPool.Put(br)
r := csv.NewReader(br)
r.Comma = comma
r.ReuseRecord = true
r.LazyQuotes = true
r.Comment = '#'
lines := 0
headerFields, _, hasMore := r.CountFields(false)
if headerFields < 2 || !hasMore {
return false
}
csvLines := 1 // 1 for header
for {
_, err := r.Read()
if errors.Is(err, io.EOF) {
fields, _, hasMore := r.CountFields(false)
if !hasMore && fields == 0 {
break
}
if err != nil {
csvLines++
if fields != headerFields {
return false
}
lines++
}
return r.FieldsPerRecord > 1 && lines > 1
}
// dropLastLine drops the last incomplete line from b.
//
// mimetype limits itself to ReadLimit bytes when performing a detection.
// This means, for file formats like CSV for NDJSON, the last line of the input
// can be an incomplete line.
func dropLastLine(b []byte, readLimit uint32) []byte {
if readLimit == 0 || uint32(len(b)) < readLimit {
return b
}
for i := len(b) - 1; i > 0; i-- {
if b[i] == '\n' {
return b[:i]
if csvLines >= 10 {
return true
}
}
return b
return csvLines >= 2
}

View File

@@ -2,7 +2,8 @@ package magic
import (
"bytes"
"encoding/binary"
"github.com/gabriel-vasile/mimetype/internal/scan"
)
var (
@@ -40,92 +41,149 @@ func Zip(raw []byte, limit uint32) bool {
(raw[3] == 0x4 || raw[3] == 0x6 || raw[3] == 0x8)
}
// Jar matches a Java archive file.
// Jar matches a Java archive file. There are two types of Jar files:
// 1. the ones that can be opened with jexec and have 0xCAFE optional flag
// https://stackoverflow.com/tags/executable-jar/info
// 2. regular jars, same as above, just without the executable flag
// https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=262278#c0
// There is an argument to only check for manifest, since it's the common nominator
// for both executable and non-executable versions. But the traversing zip entries
// is unreliable because it does linear search for signatures
// (instead of relying on offsets told by the file.)
func Jar(raw []byte, limit uint32) bool {
return zipContains(raw, []byte("META-INF/MANIFEST.MF"), false)
return executableJar(raw) ||
zipHas(raw, zipEntries{{
name: []byte("META-INF/MANIFEST.MF"),
}, {
name: []byte("META-INF/"),
}}, 1)
}
func zipContains(raw, sig []byte, msoCheck bool) bool {
b := readBuf(raw)
pk := []byte("PK\003\004")
if len(b) < 0x1E {
// KMZ matches a zipped KML file, which is "doc.kml" by convention.
func KMZ(raw []byte, _ uint32) bool {
return zipHas(raw, zipEntries{{
name: []byte("doc.kml"),
}}, 100)
}
// An executable Jar has a 0xCAFE flag enabled in the first zip entry.
// The rule from file/file is:
// >(26.s+30) leshort 0xcafe Java archive data (JAR)
func executableJar(b scan.Bytes) bool {
b.Advance(0x1A)
offset, ok := b.Uint16()
if !ok {
return false
}
b.Advance(int(offset) + 2)
if !b.advance(0x1E) {
return false
}
if bytes.HasPrefix(b, sig) {
return true
}
cafe, ok := b.Uint16()
return ok && cafe == 0xCAFE
}
if msoCheck {
skipFiles := [][]byte{
[]byte("[Content_Types].xml"),
[]byte("_rels/.rels"),
[]byte("docProps"),
[]byte("customXml"),
[]byte("[trash]"),
}
// zipIterator iterates over a zip file returning the name of the zip entries
// in that file.
type zipIterator struct {
b scan.Bytes
}
hasSkipFile := false
for _, sf := range skipFiles {
if bytes.HasPrefix(b, sf) {
hasSkipFile = true
break
}
}
if !hasSkipFile {
return false
}
}
type zipEntries []struct {
name []byte
dir bool // dir means checking just the prefix of the entry, not the whole path
}
searchOffset := binary.LittleEndian.Uint32(raw[18:]) + 49
if !b.advance(int(searchOffset)) {
return false
}
nextHeader := bytes.Index(raw[searchOffset:], pk)
if !b.advance(nextHeader) {
return false
}
if bytes.HasPrefix(b, sig) {
return true
}
for i := 0; i < 4; i++ {
if !b.advance(0x1A) {
return false
func (z zipEntries) match(file []byte) bool {
for i := range z {
if z[i].dir && bytes.HasPrefix(file, z[i].name) {
return true
}
nextHeader = bytes.Index(b, pk)
if nextHeader == -1 {
return false
}
if !b.advance(nextHeader + 0x1E) {
return false
}
if bytes.HasPrefix(b, sig) {
if bytes.Equal(file, z[i].name) {
return true
}
}
return false
}
func zipHas(raw scan.Bytes, searchFor zipEntries, stopAfter int) bool {
iter := zipIterator{raw}
for i := 0; i < stopAfter; i++ {
f := iter.next()
if len(f) == 0 {
break
}
if searchFor.match(f) {
return true
}
}
return false
}
// msoxml behaves like zipHas, but it puts restrictions on what the first zip
// entry can be.
func msoxml(raw scan.Bytes, searchFor zipEntries, stopAfter int) bool {
iter := zipIterator{raw}
for i := 0; i < stopAfter; i++ {
f := iter.next()
if len(f) == 0 {
break
}
if searchFor.match(f) {
return true
}
// If the first is not one of the next usually expected entries,
// then abort this check.
if i == 0 {
if !bytes.Equal(f, []byte("[Content_Types].xml")) &&
!bytes.Equal(f, []byte("_rels/.rels")) &&
!bytes.Equal(f, []byte("docProps")) &&
!bytes.Equal(f, []byte("customXml")) &&
!bytes.Equal(f, []byte("[trash]")) {
return false
}
}
}
return false
}
// next extracts the name of the next zip entry.
func (i *zipIterator) next() []byte {
pk := []byte("PK\003\004")
n := bytes.Index(i.b, pk)
if n == -1 {
return nil
}
i.b.Advance(n)
if !i.b.Advance(0x1A) {
return nil
}
l, ok := i.b.Uint16()
if !ok {
return nil
}
if !i.b.Advance(0x02) {
return nil
}
if len(i.b) < int(l) {
return nil
}
return i.b[:l]
}
// APK matches an Android Package Archive.
// The source of signatures is https://github.com/file/file/blob/1778642b8ba3d947a779a36fcd81f8e807220a19/magic/Magdir/archive#L1820-L1887
func APK(raw []byte, _ uint32) bool {
apkSignatures := [][]byte{
[]byte("AndroidManifest.xml"),
[]byte("META-INF/com/android/build/gradle/app-metadata.properties"),
[]byte("classes.dex"),
[]byte("resources.arsc"),
[]byte("res/drawable"),
}
for _, sig := range apkSignatures {
if zipContains(raw, sig, false) {
return true
}
}
return false
return zipHas(raw, zipEntries{{
name: []byte("AndroidManifest.xml"),
}, {
name: []byte("META-INF/com/android/build/gradle/app-metadata.properties"),
}, {
name: []byte("classes.dex"),
}, {
name: []byte("resources.arsc"),
}, {
name: []byte("res/drawable"),
}}, 100)
}

View File

@@ -0,0 +1,103 @@
// Package markup implements functions for extracting info from
// HTML and XML documents.
package markup
import (
"bytes"
"github.com/gabriel-vasile/mimetype/internal/scan"
)
func GetAnAttribute(s *scan.Bytes) (name, val string, hasMore bool) {
for scan.ByteIsWS(s.Peek()) || s.Peek() == '/' {
s.Advance(1)
}
if s.Peek() == '>' {
return "", "", false
}
// Allocate 10 to avoid resizes.
// Attribute names and values are continuous slices of bytes in input,
// so we could do without allocating and returning slices of input.
nameB := make([]byte, 0, 10)
// step 4 and 5
for {
// bap means byte at position in the specification.
bap := s.Pop()
if bap == 0 {
return "", "", false
}
if bap == '=' && len(nameB) > 0 {
val, hasMore := getAValue(s)
return string(nameB), string(val), hasMore
} else if scan.ByteIsWS(bap) {
for scan.ByteIsWS(s.Peek()) {
s.Advance(1)
}
if s.Peek() != '=' {
return string(nameB), "", true
}
s.Advance(1)
for scan.ByteIsWS(s.Peek()) {
s.Advance(1)
}
val, hasMore := getAValue(s)
return string(nameB), string(val), hasMore
} else if bap == '/' || bap == '>' {
return string(nameB), "", false
} else if bap >= 'A' && bap <= 'Z' {
nameB = append(nameB, bap+0x20)
} else {
nameB = append(nameB, bap)
}
}
}
func getAValue(s *scan.Bytes) (_ []byte, hasMore bool) {
for scan.ByteIsWS(s.Peek()) {
s.Advance(1)
}
origS, end := *s, 0
bap := s.Pop()
if bap == 0 {
return nil, false
}
end++
// Step 10
switch bap {
case '"', '\'':
val := s.PopUntil(bap)
if s.Pop() != bap {
return nil, false
}
return val, s.Peek() != 0 && s.Peek() != '>'
case '>':
return nil, false
}
// Step 11
for {
bap = s.Pop()
if bap == 0 {
return nil, false
}
switch {
case scan.ByteIsWS(bap):
return origS[:end], true
case bap == '>':
return origS[:end], false
default:
end++
}
}
}
func SkipAComment(s *scan.Bytes) (skipped bool) {
if bytes.HasPrefix(*s, []byte("<!--")) {
// Offset by 2 len(<!) because the starting and ending -- can be the same.
if i := bytes.Index((*s)[2:], []byte("-->")); i != -1 {
s.Advance(i + 2 + 3) // 2 comes from len(<!) and 3 comes from len(-->).
return true
}
}
return false
}

View File

@@ -0,0 +1,213 @@
// Package scan has functions for scanning byte slices.
package scan
import (
"bytes"
"encoding/binary"
)
// Bytes is a byte slice with helper methods for easier scanning.
type Bytes []byte
func (b *Bytes) Advance(n int) bool {
if n < 0 || len(*b) < n {
return false
}
*b = (*b)[n:]
return true
}
// TrimLWS trims whitespace from beginning of the bytes.
func (b *Bytes) TrimLWS() {
firstNonWS := 0
for ; firstNonWS < len(*b) && ByteIsWS((*b)[firstNonWS]); firstNonWS++ {
}
*b = (*b)[firstNonWS:]
}
// TrimRWS trims whitespace from the end of the bytes.
func (b *Bytes) TrimRWS() {
lb := len(*b)
for lb > 0 && ByteIsWS((*b)[lb-1]) {
*b = (*b)[:lb-1]
lb--
}
}
// Peek one byte from b or 0x00 if b is empty.
func (b *Bytes) Peek() byte {
if len(*b) > 0 {
return (*b)[0]
}
return 0
}
// Pop one byte from b or 0x00 if b is empty.
func (b *Bytes) Pop() byte {
if len(*b) > 0 {
ret := (*b)[0]
*b = (*b)[1:]
return ret
}
return 0
}
// PopN pops n bytes from b or nil if b is empty.
func (b *Bytes) PopN(n int) []byte {
if len(*b) >= n {
ret := (*b)[:n]
*b = (*b)[n:]
return ret
}
return nil
}
// PopUntil will advance b until, but not including, the first occurence of stopAt
// character. If no occurence is found, then it will advance until the end of b.
// The returned Bytes is a slice of all the bytes that we're advanced over.
func (b *Bytes) PopUntil(stopAt ...byte) Bytes {
if len(*b) == 0 {
return Bytes{}
}
i := bytes.IndexAny(*b, string(stopAt))
if i == -1 {
i = len(*b)
}
prefix := (*b)[:i]
*b = (*b)[i:]
return Bytes(prefix)
}
// ReadSlice is the same as PopUntil, but the returned value includes stopAt as well.
func (b *Bytes) ReadSlice(stopAt byte) Bytes {
if len(*b) == 0 {
return Bytes{}
}
i := bytes.IndexByte(*b, stopAt)
if i == -1 {
i = len(*b)
} else {
i++
}
prefix := (*b)[:i]
*b = (*b)[i:]
return Bytes(prefix)
}
// Line returns the first line from b and advances b with the length of the
// line. One new line character is trimmed after the line if it exists.
func (b *Bytes) Line() Bytes {
line := b.PopUntil('\n')
lline := len(line)
if lline > 0 && line[lline-1] == '\r' {
line = line[:lline-1]
}
b.Advance(1)
return line
}
// DropLastLine drops the last incomplete line from b.
//
// mimetype limits itself to ReadLimit bytes when performing a detection.
// This means, for file formats like CSV for NDJSON, the last line of the input
// can be an incomplete line.
// If b length is less than readLimit, it means we received an incomplete file
// and proceed with dropping the last line.
func (b *Bytes) DropLastLine(readLimit uint32) {
if readLimit == 0 || uint32(len(*b)) < readLimit {
return
}
for i := len(*b) - 1; i > 0; i-- {
if (*b)[i] == '\n' {
*b = (*b)[:i]
return
}
}
}
func (b *Bytes) Uint16() (uint16, bool) {
if len(*b) < 2 {
return 0, false
}
v := binary.LittleEndian.Uint16(*b)
*b = (*b)[2:]
return v, true
}
const (
CompactWS = 1 << iota
IgnoreCase
)
// Search for occurences of pattern p inside b at any index.
func (b Bytes) Search(p []byte, flags int) int {
if flags == 0 {
return bytes.Index(b, p)
}
lb, lp := len(b), len(p)
for i := range b {
if lb-i < lp {
return -1
}
if b[i:].Match(p, flags) {
return i
}
}
return 0
}
// Match pattern p at index 0 of b.
func (b Bytes) Match(p []byte, flags int) bool {
for len(b) > 0 {
// If we finished all we we're looking for from p.
if len(p) == 0 {
return true
}
if flags&IgnoreCase > 0 && isUpper(p[0]) {
if upper(b[0]) != p[0] {
return false
}
b, p = b[1:], p[1:]
} else if flags&CompactWS > 0 && ByteIsWS(p[0]) {
p = p[1:]
if !ByteIsWS(b[0]) {
return false
}
b = b[1:]
if !ByteIsWS(p[0]) {
b.TrimLWS()
}
} else {
if b[0] != p[0] {
return false
}
b, p = b[1:], p[1:]
}
}
return true
}
func isUpper(c byte) bool {
return c >= 'A' && c <= 'Z'
}
func upper(c byte) byte {
if c >= 'a' && c <= 'z' {
return c - ('a' - 'A')
}
return c
}
func ByteIsWS(b byte) bool {
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
}
var (
ASCIISpaces = []byte{' ', '\r', '\n', '\x0c', '\t'}
ASCIIDigits = []byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
)

View File

@@ -103,15 +103,17 @@ func (m *MIME) match(in []byte, readLimit uint32) *MIME {
"text/html": charset.FromHTML,
"text/xml": charset.FromXML,
}
// ps holds optional MIME parameters.
ps := map[string]string{}
charset := ""
if f, ok := needsCharset[m.mime]; ok {
if cset := f(in); cset != "" {
ps["charset"] = cset
}
// The charset comes from BOM, from HTML headers, from XML headers.
// Limit the number of bytes searched for to 1024.
charset = f(in[:min(len(in), 1024)])
}
if m == root {
return m
}
return m.cloneHierarchy(ps)
return m.cloneHierarchy(charset)
}
// flatten transforms an hierarchy of MIMEs into a slice of MIMEs.
@@ -125,10 +127,10 @@ func (m *MIME) flatten() []*MIME {
}
// clone creates a new MIME with the provided optional MIME parameters.
func (m *MIME) clone(ps map[string]string) *MIME {
func (m *MIME) clone(charset string) *MIME {
clonedMIME := m.mime
if len(ps) > 0 {
clonedMIME = mime.FormatMediaType(m.mime, ps)
if charset != "" {
clonedMIME = m.mime + "; charset=" + charset
}
return &MIME{
@@ -140,11 +142,11 @@ func (m *MIME) clone(ps map[string]string) *MIME {
// cloneHierarchy creates a clone of m and all its ancestors. The optional MIME
// parameters are set on the last child of the hierarchy.
func (m *MIME) cloneHierarchy(ps map[string]string) *MIME {
ret := m.clone(ps)
func (m *MIME) cloneHierarchy(charset string) *MIME {
ret := m.clone(charset)
lastChild := ret
for p := m.Parent(); p != nil; p = p.Parent() {
pClone := p.clone(nil)
pClone := p.clone("")
lastChild.parent = pClone
lastChild = pClone
}

View File

@@ -1,4 +1,4 @@
## 178 Supported MIME types
## 191 Supported MIME types
This file is automatically generated when running tests. Do not edit manually.
Extension | MIME type | Aliases
@@ -7,12 +7,12 @@ Extension | MIME type | Aliases
**.xpm** | image/x-xpixmap | -
**.7z** | application/x-7z-compressed | -
**.zip** | application/zip | application/x-zip, application/x-zip-compressed
**.xlsx** | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | -
**.docx** | application/vnd.openxmlformats-officedocument.wordprocessingml.document | -
**.pptx** | application/vnd.openxmlformats-officedocument.presentationml.presentation | -
**.xlsx** | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | -
**.epub** | application/epub+zip | -
**.apk** | application/vnd.android.package-archive | -
**.jar** | application/jar | -
**.jar** | application/java-archive | application/jar, application/jar-archive, application/x-java-archive
**.odt** | application/vnd.oasis.opendocument.text | application/x-vnd.oasis.opendocument.text
**.ott** | application/vnd.oasis.opendocument.text-template | application/x-vnd.oasis.opendocument.text-template
**.ods** | application/vnd.oasis.opendocument.spreadsheet | application/x-vnd.oasis.opendocument.spreadsheet
@@ -24,6 +24,8 @@ Extension | MIME type | Aliases
**.odf** | application/vnd.oasis.opendocument.formula | application/x-vnd.oasis.opendocument.formula
**.odc** | application/vnd.oasis.opendocument.chart | application/x-vnd.oasis.opendocument.chart
**.sxc** | application/vnd.sun.xml.calc | -
**.kmz** | application/vnd.google-earth.kmz | -
**.vsdx** | application/vnd.ms-visio.drawing.main+xml | -
**.pdf** | application/pdf | application/x-pdf
**.fdf** | application/vnd.fdf | -
**n/a** | application/x-ole-storage | -
@@ -61,9 +63,10 @@ Extension | MIME type | Aliases
**.tar** | application/x-tar | -
**.xar** | application/x-xar | -
**.bz2** | application/x-bzip2 | -
**.fits** | application/fits | -
**.fits** | application/fits | image/fits
**.tiff** | image/tiff | -
**.bmp** | image/bmp | image/x-bmp, image/x-ms-bmp
**.123** | application/vnd.lotus-1-2-3 | -
**.ico** | image/x-icon | -
**.mp3** | audio/mpeg | audio/x-mpeg, audio/mp3
**.flac** | audio/flac | -
@@ -146,9 +149,11 @@ Extension | MIME type | Aliases
**.cab** | application/x-installshield | -
**.jxr** | image/jxr | image/vnd.ms-photo
**.parquet** | application/vnd.apache.parquet | application/x-parquet
**.one** | application/onenote | -
**.chm** | application/vnd.ms-htmlhelp | -
**.txt** | text/plain | -
**.html** | text/html | -
**.svg** | image/svg+xml | -
**.html** | text/html | -
**.xml** | text/xml | application/xml
**.rss** | application/rss+xml | text/rss
**.atom** | application/atom+xml | -
@@ -163,14 +168,17 @@ Extension | MIME type | Aliases
**.3mf** | application/vnd.ms-package.3dmanufacturing-3dmodel+xml | -
**.xfdf** | application/vnd.adobe.xfdf | -
**.owl** | application/owl+xml | -
**.html** | application/xhtml+xml | -
**.php** | text/x-php | -
**.js** | text/javascript | application/x-javascript, application/javascript
**.lua** | text/x-lua | -
**.pl** | text/x-perl | -
**.py** | text/x-python | text/x-script.python, application/x-python
**.rb** | text/x-ruby | application/x-ruby
**.json** | application/json | -
**.geojson** | application/geo+json | -
**.har** | application/json | -
**.gltf** | model/gltf+json | -
**.ndjson** | application/x-ndjson | -
**.rtf** | text/rtf | application/rtf
**.srt** | application/x-subrip | application/x-srt, text/x-srt
@@ -181,3 +189,8 @@ Extension | MIME type | Aliases
**.ics** | text/calendar | -
**.warc** | application/warc | -
**.vtt** | text/vtt | -
**.sh** | text/x-shellscript | text/x-sh, application/x-shellscript, application/x-sh
**.pbm** | image/x-portable-bitmap | -
**.pgm** | image/x-portable-graymap | -
**.ppm** | image/x-portable-pixmap | -
**.pam** | image/x-portable-arbitrarymap | -

View File

@@ -18,12 +18,13 @@ import (
var root = newMIME("application/octet-stream", "",
func([]byte, uint32) bool { return true },
xpm, sevenZ, zip, pdf, fdf, ole, ps, psd, p7s, ogg, png, jpg, jxl, jp2, jpx,
jpm, jxs, gif, webp, exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3,
flac, midi, ape, musePack, amr, wav, aiff, au, mpeg, quickTime, mp4, webM,
jpm, jxs, gif, webp, exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, lotus, ico,
mp3, flac, midi, ape, musePack, amr, wav, aiff, au, mpeg, quickTime, mp4, webM,
avi, flv, mkv, asf, aac, voc, m3u, rmvb, gzip, class, swf, crx, ttf, woff,
woff2, otf, ttc, eot, wasm, shx, dbf, dcm, rar, djvu, mobi, lit, bpg, cbor,
sqlite3, dwg, nes, lnk, macho, qcp, icns, hdr, mrc, mdb, accdb, zstd, cab,
rpm, xz, lzip, torrent, cpio, tzif, xcf, pat, gbr, glb, cabIS, jxr, parquet,
oneNote, chm,
// Keep text last because it is the slowest check.
text,
)
@@ -48,22 +49,24 @@ var (
// This means APK should be a child of JAR detector, but in practice,
// the decisive signature for JAR might be located at the end of the file
// and not reachable because of library readLimit.
zip = newMIME("application/zip", ".zip", magic.Zip, xlsx, docx, pptx, epub, apk, jar, odt, ods, odp, odg, odf, odc, sxc).
zip = newMIME("application/zip", ".zip", magic.Zip, docx, pptx, xlsx, epub, apk, jar, odt, ods, odp, odg, odf, odc, sxc, kmz, visio).
alias("application/x-zip", "application/x-zip-compressed")
tar = newMIME("application/x-tar", ".tar", magic.Tar)
xar = newMIME("application/x-xar", ".xar", magic.Xar)
bz2 = newMIME("application/x-bzip2", ".bz2", magic.Bz2)
pdf = newMIME("application/pdf", ".pdf", magic.Pdf).
pdf = newMIME("application/pdf", ".pdf", magic.PDF).
alias("application/x-pdf")
fdf = newMIME("application/vnd.fdf", ".fdf", magic.Fdf)
xlsx = newMIME("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx", magic.Xlsx)
docx = newMIME("application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx", magic.Docx)
pptx = newMIME("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx", magic.Pptx)
epub = newMIME("application/epub+zip", ".epub", magic.Epub)
jar = newMIME("application/jar", ".jar", magic.Jar)
apk = newMIME("application/vnd.android.package-archive", ".apk", magic.APK)
ole = newMIME("application/x-ole-storage", "", magic.Ole, msi, aaf, msg, xls, pub, ppt, doc)
msi = newMIME("application/x-ms-installer", ".msi", magic.Msi).
fdf = newMIME("application/vnd.fdf", ".fdf", magic.Fdf)
xlsx = newMIME("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx", magic.Xlsx)
docx = newMIME("application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx", magic.Docx)
pptx = newMIME("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx", magic.Pptx)
visio = newMIME("application/vnd.ms-visio.drawing.main+xml", ".vsdx", magic.Visio)
epub = newMIME("application/epub+zip", ".epub", magic.Epub)
jar = newMIME("application/java-archive", ".jar", magic.Jar).
alias("application/jar", "application/jar-archive", "application/x-java-archive")
apk = newMIME("application/vnd.android.package-archive", ".apk", magic.APK)
ole = newMIME("application/x-ole-storage", "", magic.Ole, msi, aaf, msg, xls, pub, ppt, doc)
msi = newMIME("application/x-ms-installer", ".msi", magic.Msi).
alias("application/x-windows-installer", "application/x-msi")
aaf = newMIME("application/octet-stream", ".aaf", magic.Aaf)
doc = newMIME("application/msword", ".doc", magic.Doc).
@@ -75,18 +78,19 @@ var (
alias("application/msexcel")
msg = newMIME("application/vnd.ms-outlook", ".msg", magic.Msg)
ps = newMIME("application/postscript", ".ps", magic.Ps)
fits = newMIME("application/fits", ".fits", magic.Fits)
fits = newMIME("application/fits", ".fits", magic.Fits).alias("image/fits")
ogg = newMIME("application/ogg", ".ogg", magic.Ogg, oggAudio, oggVideo).
alias("application/x-ogg")
oggAudio = newMIME("audio/ogg", ".oga", magic.OggAudio)
oggVideo = newMIME("video/ogg", ".ogv", magic.OggVideo)
text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt)
xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2).
text = newMIME("text/plain", ".txt", magic.Text, svg, html, xml, php, js, lua, perl, python, ruby, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt, shell, netpbm, netpgm, netppm, netpam)
xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2, xhtml).
alias("application/xml")
json = newMIME("application/json", ".json", magic.JSON, geoJSON, har)
xhtml = newMIME("application/xhtml+xml", ".html", magic.XHTML)
json = newMIME("application/json", ".json", magic.JSON, geoJSON, har, gltf)
har = newMIME("application/json", ".har", magic.HAR)
csv = newMIME("text/csv", ".csv", magic.Csv)
tsv = newMIME("text/tab-separated-values", ".tsv", magic.Tsv)
csv = newMIME("text/csv", ".csv", magic.CSV)
tsv = newMIME("text/tab-separated-values", ".tsv", magic.TSV)
geoJSON = newMIME("application/geo+json", ".geojson", magic.GeoJSON)
ndJSON = newMIME("application/x-ndjson", ".ndjson", magic.NdJSON)
html = newMIME("text/html", ".html", magic.HTML)
@@ -101,6 +105,10 @@ var (
perl = newMIME("text/x-perl", ".pl", magic.Perl)
python = newMIME("text/x-python", ".py", magic.Python).
alias("text/x-script.python", "application/x-python")
ruby = newMIME("text/x-ruby", ".rb", magic.Ruby).
alias("application/x-ruby")
shell = newMIME("text/x-shellscript", ".sh", magic.Shell).
alias("text/x-sh", "application/x-shellscript", "application/x-sh")
tcl = newMIME("text/x-tcl", ".tcl", magic.Tcl).
alias("application/x-tcl")
vCard = newMIME("text/vcard", ".vcf", magic.VCard)
@@ -112,6 +120,7 @@ var (
atom = newMIME("application/atom+xml", ".atom", magic.Atom)
x3d = newMIME("model/x3d+xml", ".x3d", magic.X3d)
kml = newMIME("application/vnd.google-earth.kml+xml", ".kml", magic.Kml)
kmz = newMIME("application/vnd.google-earth.kmz", ".kmz", magic.KMZ)
xliff = newMIME("application/x-xliff+xml", ".xlf", magic.Xliff)
collada = newMIME("model/vnd.collada+xml", ".dae", magic.Collada)
gml = newMIME("application/gml+xml", ".gml", magic.Gml)
@@ -135,9 +144,12 @@ var (
tiff = newMIME("image/tiff", ".tiff", magic.Tiff)
bmp = newMIME("image/bmp", ".bmp", magic.Bmp).
alias("image/x-bmp", "image/x-ms-bmp")
ico = newMIME("image/x-icon", ".ico", magic.Ico)
icns = newMIME("image/x-icns", ".icns", magic.Icns)
psd = newMIME("image/vnd.adobe.photoshop", ".psd", magic.Psd).
// lotus check must be done before ico because some ico detection is a bit
// relaxed and some lotus files are wrongfully identified as ico otherwise.
lotus = newMIME("application/vnd.lotus-1-2-3", ".123", magic.Lotus123)
ico = newMIME("image/x-icon", ".ico", magic.Ico)
icns = newMIME("image/x-icns", ".icns", magic.Icns)
psd = newMIME("image/vnd.adobe.photoshop", ".psd", magic.Psd).
alias("image/x-psd", "application/photoshop")
heic = newMIME("image/heic", ".heic", magic.Heic)
heicSeq = newMIME("image/heic-sequence", ".heic", magic.HeicSequence)
@@ -262,9 +274,16 @@ var (
pat = newMIME("image/x-gimp-pat", ".pat", magic.Pat)
gbr = newMIME("image/x-gimp-gbr", ".gbr", magic.Gbr)
xfdf = newMIME("application/vnd.adobe.xfdf", ".xfdf", magic.Xfdf)
glb = newMIME("model/gltf-binary", ".glb", magic.Glb)
glb = newMIME("model/gltf-binary", ".glb", magic.GLB)
gltf = newMIME("model/gltf+json", ".gltf", magic.GLTF)
jxr = newMIME("image/jxr", ".jxr", magic.Jxr).alias("image/vnd.ms-photo")
parquet = newMIME("application/vnd.apache.parquet", ".parquet", magic.Par1).
alias("application/x-parquet")
cbor = newMIME("application/cbor", ".cbor", magic.CBOR)
netpbm = newMIME("image/x-portable-bitmap", ".pbm", magic.NetPBM)
netpgm = newMIME("image/x-portable-graymap", ".pgm", magic.NetPGM)
netppm = newMIME("image/x-portable-pixmap", ".ppm", magic.NetPPM)
netpam = newMIME("image/x-portable-arbitrarymap", ".pam", magic.NetPAM)
cbor = newMIME("application/cbor", ".cbor", magic.CBOR)
oneNote = newMIME("application/onenote", ".one", magic.One)
chm = newMIME("application/vnd.ms-htmlhelp", ".chm", magic.CHM)
)

View File

@@ -106,8 +106,9 @@ validate := validator.New(validator.WithRequiredStructEnabled())
| datauri | Data URL |
| fqdn | Full Qualified Domain Name (FQDN) |
| hostname | Hostname RFC 952 |
| hostname_port | HostPort |
| hostname_rfc1123 | Hostname RFC 1123 |
| hostname_port | HostPort |
| port | Port number |
| ip | Internet Protocol Address IP |
| ip4_addr | Internet Protocol Address IPv4 |
| ip6_addr | Internet Protocol Address IPv6 |
@@ -124,7 +125,8 @@ validate := validator.New(validator.WithRequiredStructEnabled())
| unix_addr | Unix domain socket end point Address |
| uri | URI String |
| url | URL String |
| http_url | HTTP URL String |
| http_url | HTTP(s) URL String |
| https_url | HTTPS-only URL String |
| url_encoded | URL Encoded |
| urn_rfc2141 | Urn RFC 2141 String |
@@ -133,6 +135,7 @@ validate := validator.New(validator.WithRequiredStructEnabled())
| Tag | Description |
| - | - |
| alpha | Alpha Only |
| alphaspace | Alpha Space |
| alphanum | Alphanumeric |
| alphanumunicode | Alphanumeric Unicode |
| alphaunicode | Alpha Unicode |

View File

@@ -118,6 +118,7 @@ var (
"fieldcontains": fieldContains,
"fieldexcludes": fieldExcludes,
"alpha": isAlpha,
"alphaspace": isAlphaSpace,
"alphanum": isAlphanum,
"alphaunicode": isAlphaUnicode,
"alphanumunicode": isAlphanumUnicode,
@@ -134,6 +135,7 @@ var (
"email": isEmail,
"url": isURL,
"http_url": isHttpURL,
"https_url": isHttpsURL,
"uri": isURI,
"urn_rfc2141": isUrnRFC2141, // RFC 2141
"file": isFile,
@@ -1513,6 +1515,29 @@ func isHttpURL(fl FieldLevel) bool {
panic(fmt.Sprintf("Bad field type %s", field.Type()))
}
// isHttpsURL is the validation function for validating if the current field's value is a valid HTTPS-only URL.
func isHttpsURL(fl FieldLevel) bool {
if !isURL(fl) {
return false
}
field := fl.Field()
switch field.Kind() {
case reflect.String:
s := strings.ToLower(field.String())
url, err := url.Parse(s)
if err != nil || url.Host == "" {
return false
}
return url.Scheme == "https"
}
panic(fmt.Sprintf("Bad field type %s", field.Type()))
}
// isUrnRFC2141 is the validation function for validating if the current field's value is a valid URN as per RFC 2141.
func isUrnRFC2141(fl FieldLevel) bool {
field := fl.Field()
@@ -1743,6 +1768,11 @@ func isAlphanumUnicode(fl FieldLevel) bool {
return alphaUnicodeNumericRegex().MatchString(fl.Field().String())
}
// isAlphaSpace is the validation function for validating if the current field's value is a valid alpha value with spaces.
func isAlphaSpace(fl FieldLevel) bool {
return alphaSpaceRegex().MatchString(fl.Field().String())
}
// isAlphaUnicode is the validation function for validating if the current field's value is a valid alpha unicode value.
func isAlphaUnicode(fl FieldLevel) bool {
return alphaUnicodeRegex().MatchString(fl.Field().String())
@@ -1872,6 +1902,15 @@ func requiredIf(fl FieldLevel) bool {
if len(params)%2 != 0 {
panic(fmt.Sprintf("Bad param number for required_if %s", fl.FieldName()))
}
seen := make(map[string]struct{})
for i := 0; i < len(params); i += 2 {
if _, ok := seen[params[i]]; ok {
panic(fmt.Sprintf("Duplicate param %s for required_if %s", params[i], fl.FieldName()))
}
seen[params[i]] = struct{}{}
}
for i := 0; i < len(params); i += 2 {
if !requireCheckFieldValue(fl, params[i], params[i+1], false) {
return true

View File

@@ -264,6 +264,7 @@ The field under validation must be present and not empty only if all
the other specified fields are equal to the value following the specified
field. For strings ensures value is not "". For slices, maps, pointers,
interfaces, channels and functions ensures the value is not nil. For structs ensures value is not the zero value.
Using the same field name multiple times in the parameters will result in a panic at runtime.
Usage: required_if
@@ -776,6 +777,12 @@ This validates that a string value contains ASCII alpha characters only
Usage: alpha
# Alpha Space
This validates that a string value contains ASCII alpha characters and spaces only
Usage: alphaspace
# Alphanumeric
This validates that a string value contains ASCII alphanumeric characters only
@@ -1330,6 +1337,12 @@ can be used to validate fields typically passed to sockets and connections.
Usage: hostname_port
# Port
This validates that the value falls within the valid port number range of 1 to 65,535.
Usage: port
# Datetime
This validates that a string value is a valid datetime based on the supplied datetime format.

View File

@@ -7,6 +7,7 @@ import (
const (
alphaRegexString = "^[a-zA-Z]+$"
alphaSpaceRegexString = "^[a-zA-Z ]+$"
alphaNumericRegexString = "^[a-zA-Z0-9]+$"
alphaUnicodeRegexString = "^[\\p{L}]+$"
alphaUnicodeNumericRegexString = "^[\\p{L}\\p{N}]+$"
@@ -93,6 +94,7 @@ func lazyRegexCompile(str string) func() *regexp.Regexp {
var (
alphaRegex = lazyRegexCompile(alphaRegexString)
alphaSpaceRegex = lazyRegexCompile(alphaSpaceRegexString)
alphaNumericRegex = lazyRegexCompile(alphaNumericRegexString)
alphaUnicodeRegex = lazyRegexCompile(alphaUnicodeRegexString)
alphaUnicodeNumericRegex = lazyRegexCompile(alphaUnicodeNumericRegexString)

View File

@@ -181,7 +181,7 @@ func (v Validate) ValidateMapCtx(ctx context.Context, data map[string]interface{
errs[field] = errors.New("The field: '" + field + "' is not a map to dive")
}
} else if ruleStr, ok := rule.(string); ok {
err := v.VarCtx(ctx, data[field], ruleStr)
err := v.VarWithKeyCtx(ctx, field, data[field], ruleStr)
if err != nil {
errs[field] = err
}
@@ -681,6 +681,64 @@ func (v *Validate) VarWithValueCtx(ctx context.Context, field interface{}, other
return
}
// VarWithKey validates a single variable with a key to be included in the returned error using tag style validation
// eg.
// var s string
// validate.VarWithKey("email_address", s, "required,email")
//
// WARNING: a struct can be passed for validation eg. time.Time is a struct or
// if you have a custom type and have registered a custom type handler, so must
// allow it; however unforeseen validations will occur if trying to validate a
// struct that is meant to be passed to 'validate.Struct'
//
// It returns InvalidValidationError for bad values passed in and nil or ValidationErrors as error otherwise.
// You will need to assert the error if it's not nil eg. err.(validator.ValidationErrors) to access the array of errors.
// validate Array, Slice and maps fields which may contain more than one error
func (v *Validate) VarWithKey(key string, field interface{}, tag string) error {
return v.VarWithKeyCtx(context.Background(), key, field, tag)
}
// VarWithKeyCtx validates a single variable with a key to be included in the returned error using tag style validation
// and allows passing of contextual validation information via context.Context.
// eg.
// var s string
// validate.VarWithKeyCtx("email_address", s, "required,email")
//
// WARNING: a struct can be passed for validation eg. time.Time is a struct or
// if you have a custom type and have registered a custom type handler, so must
// allow it; however unforeseen validations will occur if trying to validate a
// struct that is meant to be passed to 'validate.Struct'
//
// It returns InvalidValidationError for bad values passed in and nil or ValidationErrors as error otherwise.
// You will need to assert the error if it's not nil eg. err.(validator.ValidationErrors) to access the array of errors.
// validate Array, Slice and maps fields which may contain more than one error
func (v *Validate) VarWithKeyCtx(ctx context.Context, key string, field interface{}, tag string) (err error) {
if len(tag) == 0 || tag == skipValidationTag {
return nil
}
ctag := v.fetchCacheTag(tag)
cField := &cField{
name: key,
altName: key,
namesEqual: true,
}
val := reflect.ValueOf(field)
vd := v.pool.Get().(*validate)
vd.top = val
vd.isPartial = false
vd.traverseField(ctx, val, val, vd.ns[0:0], vd.actualNs[0:0], cField, ctag)
if len(vd.errs) > 0 {
err = vd.errs
vd.errs = nil
}
v.pool.Put(vd)
return
}
func (v *Validate) registerValidation(tag string, fn FuncCtx, bakedIn bool, nilCheckable bool) error {
if len(tag) == 0 {
return errors.New("function Key cannot be empty")

11
vendor/modules.txt vendored
View File

@@ -161,12 +161,15 @@ github.com/felixge/httpsnoop
# github.com/fxamacker/cbor/v2 v2.7.0
## explicit; go 1.17
github.com/fxamacker/cbor/v2
# github.com/gabriel-vasile/mimetype v1.4.8
## explicit; go 1.20
# github.com/gabriel-vasile/mimetype v1.4.10
## explicit; go 1.21
github.com/gabriel-vasile/mimetype
github.com/gabriel-vasile/mimetype/internal/charset
github.com/gabriel-vasile/mimetype/internal/csv
github.com/gabriel-vasile/mimetype/internal/json
github.com/gabriel-vasile/mimetype/internal/magic
github.com/gabriel-vasile/mimetype/internal/markup
github.com/gabriel-vasile/mimetype/internal/scan
# github.com/go-gomail/gomail v0.0.0-20160411212932-81ebce5c23df
## explicit
github.com/go-gomail/gomail
@@ -194,8 +197,8 @@ github.com/go-playground/locales/currency
# github.com/go-playground/universal-translator v0.18.1
## explicit; go 1.18
github.com/go-playground/universal-translator
# github.com/go-playground/validator/v10 v10.27.0
## explicit; go 1.20
# github.com/go-playground/validator/v10 v10.28.0
## explicit; go 1.24.0
github.com/go-playground/validator/v10
# github.com/gogo/protobuf v1.3.2
## explicit; go 1.15