gtsocial-umbx

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit dd83ad053c0cde5b948cbfe34ec4864cf0a123e3
parent 938328cd077d40b75e0834d56ff8d43ad035fd2b
Author: tobi <31960611+tsmethurst@users.noreply.github.com>
Date:   Thu, 29 Sep 2022 12:03:17 +0200

[feature] Add `meta robots` tag; allow robots to index profile card if user is Discoverable (#842)

* rework robots.txt response

* don't let robots snippet from statuses/threads

* allow robots to index if user is Discoverable

* add license text
Diffstat:
Minternal/api/security/robots.go | 46+++++++++++++++++++++++++++++++++++++++++++---
Minternal/web/profile.go | 7+++++++
Ainternal/web/robots.go | 24++++++++++++++++++++++++
Mweb/template/header.tmpl | 1+
Mweb/template/profile.tmpl | 2+-
Mweb/template/status.tmpl | 6+++---
Mweb/template/thread.tmpl | 2+-
7 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/internal/api/security/robots.go b/internal/api/security/robots.go @@ -1,3 +1,21 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + package security import ( @@ -7,11 +25,33 @@ import ( ) const robotsString = `User-agent: * -Disallow: / +Crawl-delay: 500 +# api stuff +Disallow: /api/ +# auth/login stuff +Disallow: /auth/ +Disallow: /oauth/ +Disallow: /check_your_email +Disallow: /wait_for_approval +Disallow: /account_disabled +# well known stuff +Disallow: /.well-known/ +# files +Disallow: /fileserver/ +# s2s AP stuff +Disallow: /users/ +Disallow: /emoji/ +# panels +Disallow: /admin +Disallow: /user +Disallow: /settings/ ` -// RobotsGETHandler returns the most restrictive possible robots.txt file in response to a call to /robots.txt. -// The response instructs bots with *any* user agent not to index the instance at all. +// RobotsGETHandler returns a decent robots.txt that prevents crawling +// the api, auth pages, settings pages, etc. +// +// More granular robots meta tags are then applied for web pages +// depending on user preferences (see internal/web). func (m *Module) RobotsGETHandler(c *gin.Context) { c.String(http.StatusOK, robotsString) } diff --git a/internal/web/profile.go b/internal/web/profile.go @@ -82,6 +82,12 @@ func (m *Module) profileGETHandler(c *gin.Context) { return } + // only allow search engines / robots to view this page if account is discoverable + var robotsMeta string + if account.Discoverable { + robotsMeta = robotsAllowSome + } + // we should only show the 'back to top' button if the // profile visitor is paging through statuses showBackToTop := false @@ -112,6 +118,7 @@ func (m *Module) profileGETHandler(c *gin.Context) { "instance": instance, "account": account, "ogMeta": ogBase(instance).withAccount(account), + "robotsMeta": robotsMeta, "statuses": statusResp.Items, "statuses_next": statusResp.NextLink, "show_back_to_top": showBackToTop, diff --git a/internal/web/robots.go b/internal/web/robots.go @@ -0,0 +1,24 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package web + +// https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#robotsmeta +const ( + robotsAllowSome = "nofollow, noarchive, nositelinkssearchbox, max-image-preview:standard" +) diff --git a/web/template/header.tmpl b/web/template/header.tmpl @@ -6,6 +6,7 @@ <meta charset="UTF-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <meta name="robots" content="{{ if .robotsMeta }}{{ .robotsMeta }}{{ else }}noindex, nofollow{{ end }}"> {{ if .ogMeta }}{{ if .ogMeta.Locale }}<meta name="og:locale" content="{{ .ogMeta.Locale }}"> {{ end }}<meta name="og:type" content="{{ .ogMeta.ResourceType }}"> <meta name="og:title" content="{{ .ogMeta.Title }}"> diff --git a/web/template/profile.tmpl b/web/template/profile.tmpl @@ -29,7 +29,7 @@ </div> <h2 id="recent">Latest public toots</h2> {{ if not .statuses }} - <div class="nothinghere">Nothing here!</div> + <div data-nosnippet class="nothinghere">Nothing here!</div> {{ else }} <div class="thread"> {{ range .statuses }} diff --git a/web/template/status.tmpl b/web/template/status.tmpl @@ -1,4 +1,4 @@ -<div class="contentgrid"> +<div data-nosnippet class="contentgrid"> <a href="{{.Account.URL}}" class="avatar"><img src="{{.Account.Avatar}}" alt=""></a> <a href="{{.Account.URL}}" class="displayname">{{if .Account.DisplayName}}{{emojify .Account.Emojis (escape .Account.DisplayName)}}{{else}}{{.Account.Username}}{{end}}</a> <a href="{{.Account.URL}}" class="username">@{{.Account.Acct}}</a> @@ -52,4 +52,4 @@ <div id="favorites"><i aria-label="Favorites" class="fa fa-star"></i> {{.FavouritesCount}}</div> </div> </div> -<a href="{{.URL}}" class="toot-link">View toot</a> -\ No newline at end of file +<a data-nosnippet href="{{.URL}}" class="toot-link">View toot</a> +\ No newline at end of file diff --git a/web/template/thread.tmpl b/web/template/thread.tmpl @@ -1,6 +1,6 @@ {{ template "header.tmpl" .}} <main> - <div class="thread"> + <div data-nosnippet class="thread"> {{range .context.Ancestors}} <div class="toot"> {{ template "status.tmpl" .}}