blob: ce5045364754adb2f1bdb9b505f6b0e97bf4ae8f [file] [log] [blame]
// Copyright 2016 The Upspin Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package user provides tools for parsing and validating user names.
package user // import "upspin.io/user"
import (
"strings"
"golang.org/x/text/secure/precis"
"upspin.io/errors"
"upspin.io/upspin"
)
// Parse splits an upspin.UserName into user and domain and returns the pair.
// It also returns the "+" suffix part of the user name, if it has one. For example,
// given the user name
// ann+backup@example.com
// it would return the strings
// "ann+backup" "backup" "example.com"
//
// Parsed validates the name as an e-mail address and lower-cases the domain
// so it is canonical.
//
// The rules are:
//
// <name> := <user name>@<domain name>
//
// <domain name> :=
//
// - each . separated token < 64 characters
// - character set for tokens [a-z0-9\-]
// - final token at least two characters
// - whole name < 254 characters
// - characters are case insensitive
// - final period is OK, but we remove it
//
// We ignore the rules of punycode, which is defined in https://tools.ietf.org/html/rfc3490 .
//
// <user name> :=
//
// Names are validated and canonicalized by the UsernameCasePreserved profile
// of the RFC 7613, "Preparation, Enforcement, and Comparison of Internationalized Strings",
// also known as PRECIS.
//
// Further restrictions are added here. The only ASCII punctuation characters
// that are legal are "!#$%&'*+-/=?^_{|}~", and a name that is only ASCII punctuation
// is rejected.
//
// As a special case for use in Access and Group files, the name "*" is allowed.
//
// Case is significant and spaces are not allowed.
//
// The username suffix is tightly constrained: It uses the same character
// set as domains, but of course the spacing of periods is irrelevant.
//
// Facebook and Google constrain usernames to [a-zA-Z0-9+-.],
// ignoring the period and, in Google only, ignoring everything
// from a plus sign onwards. We accept a superset of this but do not
// follow the "ignore" rules.
//
func Parse(userName upspin.UserName) (user, suffix, domain string, err error) {
const op = errors.Op("user.Parse")
name := string(userName)
if len(userName) >= 254 {
return "", "", "", errors.E(op, errors.Invalid, userName, "name too long")
}
if strings.Count(name, "@") != 1 {
return "", "", "", errors.E(op, errors.Invalid, userName, errors.Str("user name must contain one @ symbol"))
}
at := strings.IndexByte(name, '@')
user, domain = name[:at], name[at+1:]
if user == "*" {
// An important special case:
} else {
user, suffix, err = parseUser(op, userName, user)
if err != nil {
return "", "", "", err
}
}
domain, err = parseDomain(op, userName, domain)
if err != nil {
return "", "", "", err
}
return user, suffix, domain, nil
}
// ParseUser parses the component of a user name before the '@', that is, the
// user component of an email address. The rules are defined in the
// documentation for Parse except that "*" is not a valid user and the user name
// itself must be less than 255 bytes long.
func ParseUser(user string) (userName, suffix string, err error) {
return parseUser(errors.Op("user.ParseUser"), upspin.UserName(user), user)
}
// parseUser is the implementation of ParseUser, also called by Parse.
// It takes the full UserName as well as the user component, to aid in error reporting.
func parseUser(op errors.Op, userName upspin.UserName, user string) (string, string, error) {
if len(user) >= 255 {
return errParseUser(op, userName, "user name too long")
}
if user == "" {
return errParseUser(op, userName, "missing user name")
}
plus := strings.IndexByte(user, '+')
if plus == len(user)-1 { // Check first because PRECIS dislikes + at end of string.
return errParseUser(op, userName, "empty +suffix in user name")
}
// Validate and canonicalize the user name - and maybe suffix, but
// the suffix is checked more thoroughly below. We include the suffix
// here because PRECIS will prevent things like "+" or "ann+" or
// "+ann" as the full name. That is, we do PRECIS validation on
// the full user+suffix.
user, err := canonicalize(user)
if err != nil {
return "", "", errors.E(op, errors.Invalid, user, err)
}
// Valid +suffix (if any)?
suffix := ""
if plus >= 0 {
if plus == 0 {
return errParseUser(op, userName, "user name cannot start with +suffix")
}
suffix = user[plus+1:]
if strings.IndexByte(suffix, '+') > 0 {
return errParseUser(op, userName, "multiple +suffixes in user name")
}
for _, c := range suffix {
if !okDomainChar(c) {
return errParseUser(op, userName, "bad symbol in +suffix")
}
}
}
return user, suffix, nil
}
// ParseDomain parses the component of a user name after the '@', that is, the
// domain component of an email address. The rules are defined in the
// documentation for Parse except the domain name itself must be less than 255
// bytes long.
func ParseDomain(domain string) (string, error) {
return parseDomain(errors.Op("user.ParseDomain"), upspin.UserName(domain), domain)
}
// parseDomain is the implementation of ParseDomain, also called by Parse.
// It takes the full UserName as well as the domain component, to aid in error reporting.
func parseDomain(op errors.Op, userName upspin.UserName, domain string) (string, error) {
if len(domain) >= 255 {
return errParseDomain(op, userName, "domain name too long")
}
// Final period in domain is legal but is dropped.
domain = strings.TrimSuffix(domain, ".")
if domain == "" {
return errParseDomain(op, userName, "missing domain name")
}
if strings.Count(domain, ".") == 0 {
return errParseDomain(op, userName, "domain name must contain a period")
}
// Valid domain name?
period := -1 // First time through loop will fail if first byte is a period.
isUpper := false
for i, c := range domain {
if !okDomainChar(c) {
return errParseDomain(op, userName, "bad symbol in domain name")
}
if c == '.' {
if i-1 >= period+64 {
return errParseDomain(op, userName, "invalid domain name element")
}
if i-1 == period || i-1 >= period+64 {
return errParseDomain(op, userName, "invalid domain name element")
}
period = i
}
if 'A' <= c && c <= 'Z' {
isUpper = true
}
}
// Last domain element must be at least two bytes (".co")
if period+2 >= len(domain) {
return errParseDomain(op, userName, "invalid domain name")
}
// Lower-case the domain name if necessary.
if isUpper {
domain = strings.ToLower(domain)
}
return domain, nil
}
func errParseUser(op errors.Op, userName upspin.UserName, msg string) (u, s string, err error) {
return "", "", errors.E(op, errors.Invalid, userName, msg)
}
func errParseDomain(op errors.Op, userName upspin.UserName, msg string) (d string, err error) {
return "", errors.E(op, errors.Invalid, userName, msg)
}
func canonicalize(user string) (string, error) {
// PRECIS allows any ASCII character, but we are more restrictive.
// That's OK because the ASCII check is cheap and almost always
// sufficient.
allPunct := true
simple := true
for _, r := range user {
if illegalASCIIPunctuation(r) {
return "", errors.Errorf("illegal character %q", r)
}
if !legalASCIIPunctuation(r) {
allPunct = false
}
if !simpleUserNameChar(r) {
simple = false
}
}
if allPunct {
return "", errors.Errorf("user name contains only punctuation")
}
if !simple {
return precis.UsernameCasePreserved.String(user)
}
return user, nil
}
// Used by canonicalize to identify simple strings that don't need PRECIS processing.
// Note we don't check punctuation here because identifiers allow punctuation but
// only in certain places; let PRECIS do the work. "*" is the exception.
func simpleUserNameChar(r rune) bool {
switch {
case 'a' <= r && r <= 'z':
return true
case 'A' <= r && r <= 'Z':
return true
case '0' <= r && r <= '9':
return true
}
return false
}
// illegalASCIIPunctuation reports whether the rune is an ASCII punctuation
// character that is allowed by PRECIS but not by us within a user name.
// We include @ because this does not look at the domain name, just the user part.
func illegalASCIIPunctuation(r rune) bool {
return strings.ContainsRune(" @\"(),:;<>[\\]`", r)
}
// legalASCIIPunctuation reports whether the rune is an ASCII punctuation
// character that is allowed by us.
func legalASCIIPunctuation(r rune) bool {
return strings.ContainsRune("!#.$%&'*+-/=?^_{|}~", r)
}
// See the comments for UserAndDomain.
func okDomainChar(r rune) bool {
switch {
case 'a' <= r && r <= 'z':
return true
case 'A' <= r && r <= 'Z':
return true
case '0' <= r && r <= '9':
return true
case strings.ContainsRune("+-.", r):
return true
}
return false
}
// Clean returns the user name in canonical form as described by
// the comments for the Parse function.
func Clean(userName upspin.UserName) (upspin.UserName, error) {
user, _, domain, err := Parse(userName)
if err != nil {
return "", err
}
// Do we need to rebuild? Avoid allocation if we can.
userString := string(userName)
atSign := strings.IndexByte(userString, '@')
if user == userString[:atSign] && domain == userString[atSign+1:] {
return userName, nil
}
return upspin.UserName(user + "@" + domain), nil
}