Skip to content

Commit

Permalink
Merge branch 'main' of github.com:bluesky-social/indigo into combo-ha…
Browse files Browse the repository at this point in the history
…shtag-rule
  • Loading branch information
foysalit committed Sep 9, 2024
2 parents 97110ad + 58e6cb4 commit d5ecb24
Show file tree
Hide file tree
Showing 46 changed files with 1,252 additions and 141 deletions.
3 changes: 3 additions & 0 deletions automod/rules/all.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"github.com/bluesky-social/indigo/automod"
)

// IMPORTANT: reminder that these are the indigo-edition rules, not production rules
func DefaultRules() automod.RuleSet {
rules := automod.RuleSet{
PostRules: []automod.PostRuleFunc{
Expand All @@ -20,13 +21,15 @@ func DefaultRules() automod.RuleSet {
ReplySingleBadWordPostRule,
AggressivePromotionRule,
IdenticalReplyPostRule,
//IdenticalReplyPostSameParentRule,
DistinctMentionsRule,
YoungAccountDistinctMentionsRule,
MisleadingLinkUnicodeReversalPostRule,
SimpleBotPostRule,
HarassmentTargetInteractionPostRule,
HarassmentTrivialPostRule,
NostrSpamPostRule,
TrivialSpamPostRule,
},
ProfileRules: []automod.ProfileRuleFunc{
GtubeProfileRule,
Expand Down
9 changes: 6 additions & 3 deletions automod/rules/interaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
)

var interactionDailyThreshold = 800
var followsDailyThreshold = 3000

var _ automod.RecordRuleFunc = InteractionChurnRule

Expand All @@ -26,6 +27,7 @@ func InteractionChurnRule(c *automod.RecordContext) error {
c.AddAccountFlag("high-like-churn")
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("interaction churn: %d likes, %d unlikes today (so far)", created, deleted))
c.Notify("slack")
return nil
}
case "app.bsky.graph.follow":
c.Increment("follow", did)
Expand All @@ -37,14 +39,15 @@ func InteractionChurnRule(c *automod.RecordContext) error {
c.AddAccountFlag("high-follow-churn")
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("interaction churn: %d follows, %d unfollows today (so far)", created, deleted))
c.Notify("slack")
return nil
}
// just generic bulk following
followRatio := float64(c.Account.FollowersCount) / float64(c.Account.FollowsCount)
if created > interactionDailyThreshold && c.Account.FollowsCount > 2000 && followRatio < 0.2 {
if created > followsDailyThreshold {
c.Logger.Info("bulk-follower", "created-today", created)
c.AddAccountFlag("bulk-follower")
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("bulk following: %d follows today (so far)", created))
//c.Notify("slack")
c.Notify("slack")
return nil
}
}
return nil
Expand Down
34 changes: 34 additions & 0 deletions automod/rules/quick.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,15 @@ func BotLinkProfileRule(c *automod.RecordContext, profile *appbsky.ActorProfile)
c.AddAccountLabel("spam")
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("possible bot based on link in profile: %s", str))
c.Notify("slack")
return nil
}
}
if strings.Contains(*profile.Description, "🏈🍕🌀") {
c.AddAccountFlag("profile-bot-string")
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("possible bot based on string in profile"))
c.Notify("slack")
return nil
}
}
return nil
}
Expand All @@ -38,6 +45,7 @@ func SimpleBotPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error {
c.AddAccountFlag("post-bot-string")
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("possible bot based on string in post: %s", str))
c.Notify("slack")
return nil
}
}
return nil
Expand All @@ -55,7 +63,33 @@ func NewAccountBotEmailRule(c *automod.AccountContext) error {
c.AddAccountFlag("new-suspicious-email")
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("possible bot based on email domain TLD: %s", tld))
c.Notify("slack")
return nil
}
}
return nil
}

var _ automod.PostRuleFunc = TrivialSpamPostRule

// looks for new accounts, which frequently post the same type of content
func TrivialSpamPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error {
if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 8*24*time.Hour) {
return nil
}

// only posts with dumb patterns (for now)
txt := strings.ToLower(post.Text)
if !c.InSet("trivial-spam-text", txt) {
return nil
}

// only accounts with empty profile (for now)
if c.Account.Profile.HasAvatar {
return nil
}

c.ReportAccount(automod.ReportReasonOther, fmt.Sprintf("trivial spam account (also labeled; remove label if this isn't spam!)"))
c.AddAccountLabel("!hide")
c.Notify("slack")
return nil
}
51 changes: 48 additions & 3 deletions automod/rules/replies.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ func ReplyCountPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error
}

// triggers on the N+1 post
var identicalReplyLimit = 6
// var identicalReplyLimit = 6
// TODO: bumping temporarily
var identicalReplyLimit = 20
var identicalReplyActionLimit = 75

var _ automod.PostRuleFunc = IdenticalReplyPostRule

Expand Down Expand Up @@ -69,14 +72,56 @@ func IdenticalReplyPostRule(c *automod.RecordContext, post *appbsky.FeedPost) er
count := c.GetCount("reply-text", bucket, period)
if count >= identicalReplyLimit {
c.AddAccountFlag("multi-identical-reply")
c.ReportAccount(automod.ReportReasonRude, fmt.Sprintf("possible spam (new account, %d identical reply-posts today)", count))
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("possible spam (new account, %d identical reply-posts today)", count))
c.Notify("slack")
}
if count >= identicalReplyActionLimit && utf8.RuneCountInString(post.Text) > 100 {
c.ReportAccount(automod.ReportReasonRude, fmt.Sprintf("likely spam/harassment (new account, %d identical reply-posts today), actioned (remove label urgently if account is ok)", count))
c.AddAccountLabel("!warn")
c.Notify("slack")
}

return nil
}

// Similar to above rule but only counts replies to the same post. More aggressively applies a spam label to new accounts that are less than a day old.
var identicalReplySameParentLimit = 3
var identicalReplySameParentMaxAge = 24 * time.Hour
var identicalReplySameParentMaxPosts int64 = 50
var _ automod.PostRuleFunc = IdenticalReplyPostSameParentRule

func IdenticalReplyPostSameParentRule(c *automod.RecordContext, post *appbsky.FeedPost) error {
if post.Reply == nil || IsSelfThread(c, post) {
return nil
}

if ParentOrRootIsFollower(c, post) {
return nil
}

postCount := c.Account.PostsCount
if AccountIsOlderThan(&c.AccountContext, identicalReplySameParentMaxAge) || postCount >= identicalReplySameParentMaxPosts {
return nil
}

period := countstore.PeriodHour
bucket := c.Account.Identity.DID.String() + "/" + post.Reply.Parent.Uri + "/" + HashOfString(post.Text)
c.IncrementPeriod("reply-text-same-post", bucket, period)

count := c.GetCount("reply-text-same-post", bucket, period)
if count >= identicalReplySameParentLimit {
c.AddAccountFlag("multi-identical-reply-same-post")
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("possible spam (%d identical reply-posts to same post today)", count))
c.AddAccountLabel("spam")
c.Notify("slack")
}

return nil
}

var youngReplyAccountLimit = 12
// TODO: bumping temporarily
// var youngReplyAccountLimit = 12
var youngReplyAccountLimit = 200
var _ automod.PostRuleFunc = YoungAccountDistinctRepliesRule

func YoungAccountDistinctRepliesRule(c *automod.RecordContext, post *appbsky.FeedPost) error {
Expand Down
4 changes: 3 additions & 1 deletion automod/rules/replies_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,7 @@ func TestIdenticalReplyPostRule(t *testing.T) {
assert.NoError(capture.ProcessCaptureRules(&eng, cap))
f, err := eng.Flags.Get(ctx, did)
assert.NoError(err)
assert.Equal([]string{"multi-identical-reply"}, f)
// TODO: tweaked threshold, disabling for now
_ = f
//assert.Equal([]string{"multi-identical-reply"}, f)
}
4 changes: 3 additions & 1 deletion automod/visual/hiveai_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ func summarizeSexualLabels(cl []HiveAIResp_Class) string {
// then finally flag remaining "underwear" images in to sexually suggestive
// (after non-sexual content already labeled above)
for _, underwearClass := range []string{"yes_male_underwear", "yes_female_underwear"} {
if scores[underwearClass] >= threshold {
// TODO: experimenting with higher threshhold during traffic spike
//if scores[underwearClass] >= threshold {
if scores[underwearClass] >= 0.98 {
return "sexual"
}
}
Expand Down
29 changes: 19 additions & 10 deletions automod/visual/hiveai_rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package visual

import (
"strings"
"time"

"github.com/bluesky-social/indigo/automod"
"github.com/bluesky-social/indigo/automod/rules"
lexutil "github.com/bluesky-social/indigo/lex/util"
)

Expand All @@ -13,13 +15,14 @@ func (hal *HiveAIClient) HiveLabelBlobRule(c *automod.RecordContext, blob lexuti
return nil
}

var psclabel string
var prescreenResult string
if hal.PreScreenClient != nil {
val, err := hal.PreScreenClient.PreScreenImage(c.Ctx, data)
if err != nil {
c.Logger.Info("prescreen-request-error", "err", err)
} else {
psclabel = val
prescreenResult = val
c.Logger.Info("prescreen-request", "uri", c.RecordOp.ATURI(), "result", prescreenResult)
}
}

Expand All @@ -28,18 +31,24 @@ func (hal *HiveAIClient) HiveLabelBlobRule(c *automod.RecordContext, blob lexuti
return err
}

if psclabel == "sfw" {
if len(labels) > 0 {
c.Logger.Info("prescreen-safe-failure", "uri", c.RecordOp.ATURI())
} else {
c.Logger.Info("prescreen-safe-success", "uri", c.RecordOp.ATURI())
if hal.PreScreenClient != nil {
if prescreenResult == "sfw" {
if len(labels) > 0 {
c.Logger.Info("prescreen-safe-failure", "uri", c.RecordOp.ATURI(), "labels", labels, "result", prescreenResult)
} else {
c.Logger.Info("prescreen-safe-success", "uri", c.RecordOp.ATURI())
}
}
} else {
c.Logger.Info("prescreen-nsfw", "uri", c.RecordOp.ATURI())
}

for _, l := range labels {
c.AddRecordLabel(l)
// NOTE: experimenting with profile reporting for new accounts
if l == "sexual" && c.RecordOp.Collection.String() == "app.bsky.actor.profile" && rules.AccountIsYoungerThan(&c.AccountContext, 2*24*time.Hour) {
c.ReportRecord(automod.ReportReasonSexual, "possible sexual profile (not labeled yet)")
c.Logger.Info("skipping record label", "label", l, "reason", "sexual-profile-experiment")
} else {
c.AddRecordLabel(l)
}
}

return nil
Expand Down
2 changes: 1 addition & 1 deletion backfill/backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ func (bf *Backfiller) HandleEvent(ctx context.Context, evt *atproto.SyncSubscrib
return fmt.Errorf("failed to read event repo: %w", err)
}

var ops []*BufferedOp
ops := make([]*BufferedOp, 0, len(evt.Ops))
for _, op := range evt.Ops {
kind := repomgr.EventKind(op.Action)
switch kind {
Expand Down
41 changes: 5 additions & 36 deletions bgs/bgs.go
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,6 @@ func (bgs *BGS) EventsHandler(c echo.Context) error {

logger.Infow("new consumer", "cursor", since)

header := events.EventHeader{Op: events.EvtKindMessage}
for {
select {
case evt, ok := <-evts:
Expand All @@ -655,42 +654,12 @@ func (bgs *BGS) EventsHandler(c echo.Context) error {
return err
}

var obj lexutil.CBOR

switch {
case evt.Error != nil:
header.Op = events.EvtKindErrorFrame
obj = evt.Error
case evt.RepoCommit != nil:
header.MsgType = "#commit"
obj = evt.RepoCommit
case evt.RepoHandle != nil:
header.MsgType = "#handle"
obj = evt.RepoHandle
case evt.RepoIdentity != nil:
header.MsgType = "#identity"
obj = evt.RepoIdentity
case evt.RepoAccount != nil:
header.MsgType = "#account"
obj = evt.RepoAccount
case evt.RepoInfo != nil:
header.MsgType = "#info"
obj = evt.RepoInfo
case evt.RepoMigrate != nil:
header.MsgType = "#migrate"
obj = evt.RepoMigrate
case evt.RepoTombstone != nil:
header.MsgType = "#tombstone"
obj = evt.RepoTombstone
default:
return fmt.Errorf("unrecognized event kind")
if evt.Preserialized != nil {
_, err = wc.Write(evt.Preserialized)
} else {
err = evt.Serialize(wc)
}

if err := header.MarshalCBOR(wc); err != nil {
return fmt.Errorf("failed to write header: %w", err)
}

if err := obj.MarshalCBOR(wc); err != nil {
if err != nil {
return fmt.Errorf("failed to write event: %w", err)
}

Expand Down
Loading

0 comments on commit d5ecb24

Please sign in to comment.