//====== Copyright (c) 1996-2005, Valve Corporation, All rights reserved. ======= // // Purpose: Implementation of China Government Censorship enforced on all user-generated strings // //============================================================================= // // Pre-compiled header (cbase.h in the game branch; none in Steam Client branch) // #include "cbase.h" // // Custom implementations for sharing code verbatim between Steam Client and the game branch // #ifdef CSTRIKE15 static bool BannedWords_LoadFileIntoBuffer( char const *szFilename, CUtlBuffer &buf ) { return g_pFullFileSystem->ReadFile( szFilename, "MOD", buf ); } #else #include "utlbuffer.h" #include "utlmap.h" #include "filesystem.h" #include "filesystem_helpers.h" #include "tier1/fileio.h" static bool BannedWords_LoadFileIntoBuffer( char const *szFilename, CUtlBuffer &buf ) { return LoadFileIntoBuffer( szFilename, buf, false ); } #endif #include "bannedwords.h" #include "utlmemory.h" #include "utlbuffer.h" // NOTE: This has to be the last file included! #include "tier0/memdbgon.h" ////////////////////////////////////////////////////////////////////////// // // Banned words dictionary // ////////////////////////////////////////////////////////////////////////// class CBannedWordsDictionary { public: ~CBannedWordsDictionary() { m_mapExternalStrings.PurgeAndDeleteElements(); } bool InitFromFile( char const *szFilename ); int CensorBannedWordsInplace( wchar_t *wsz ) const; char const * CensorExternalString( uint64 ullKey, char const *szExternalString ); public: CUtlBuffer m_buf; struct ExternalStringCache_t { char m_chExternalString[256]; char m_chCensoredString[256]; }; typedef CUtlMap< uint64, ExternalStringCache_t *, int, CDefLess< uint64 > > KeyStringMap_t; KeyStringMap_t m_mapExternalStrings; }; bool CBannedWordsDictionary::InitFromFile( char const *szFilename ) { if ( !BannedWords_LoadFileIntoBuffer( szFilename, m_buf ) ) return false; if ( m_buf.TellPut() % 2 ) return false; if ( m_buf.TellPut() <= 0x10000 ) return false; if ( V_memcmp( m_buf.Base(), "BDR1", 4 ) ) return false; return true; } int CBannedWordsDictionary::CensorBannedWordsInplace( wchar_t *wsz ) const { wchar_t const * const wszStartOfInput = wsz; int numReplaced = 0; for ( ; *wsz; ++ wsz ) { wchar_t wchThisLetter = *wsz; if ( ( wchThisLetter >= 'A' ) && ( wchThisLetter <= 'Z' ) ) wchThisLetter += 'a' - 'A'; // ensure input is also lowercase int32 nOffset = reinterpret_cast< int32 const * >( m_buf.Base() )[ wchThisLetter ]; if ( !nOffset ) continue; // no banned words start with this char for ( wchar_t const *pwchBan = ( wchar_t const * ) ( ( byte const * )( m_buf.Base() ) + nOffset ); pwchBan[1] == wchThisLetter; pwchBan += 1 + *pwchBan + 1 ) // (len wchar) + (actual number of wchars) + wnull-terminator { bool bWordBanned = true; bool bWordIsAllAlpha = true; { // if ( wcsncmp( wsz, pwchBan + 1, *pwchBan ) ) continue; wchar_t const *x = wsz; wchar_t const *y = pwchBan + 1; // dictionary word, compiled as lowercase for ( wchar_t numChecksRemaining = *pwchBan; numChecksRemaining -- > 0; ++ x, ++ y ) { wchar_t wchx = *x; if ( ( wchx >= 'A' ) && ( wchx <= 'Z' ) ) wchx += 'a' - 'A'; // ensure input is also lowercase if ( wchx != *y ) { bWordBanned = false; break; } if ( !( ( wchx >= 'a' ) && ( wchx <= 'z' ) ) ) bWordIsAllAlpha = false; } } if ( bWordBanned && bWordIsAllAlpha ) { bool bBannedSequenceStartsWord = ( ( wsz <= wszStartOfInput ) || ( wsz[ -1 ] >= 0xFF ) || !V_isalpha( wsz[ -1 ] ) ); bool bBannedSequenceEndsWord = ( !wsz[ *pwchBan ] || ( wsz[ *pwchBan ] >= 0xFF ) || !V_isalpha( wsz[ *pwchBan ] ) ); // Must match the full word, not substring in English word (otherwise banned words like // "ri", "mb", "sm" cause censoring all around) if ( *pwchBan < 4 ) bWordBanned = bBannedSequenceStartsWord && bBannedSequenceEndsWord; // Otherwise require that the banned word appears at the start or end of a word // so that it censored words like "bullshit" or "shitshow", but didn't censor // pro player name "pashaBiceps" containing "shabi" else bWordBanned = bBannedSequenceStartsWord || bBannedSequenceEndsWord; } if ( !bWordBanned ) continue; // BANNED WORD! for ( int kk = *pwchBan; kk -- > 0; ++ wsz ) { *wsz = L'*'; ++ numReplaced; } -- wsz; // already advanced by number of asterisks inserted (-1 because the loop will ++) break; } } return numReplaced; } char const * CBannedWordsDictionary::CensorExternalString( uint64 ullKey, char const *szExternalString ) { KeyStringMap_t::IndexType_t idx = m_mapExternalStrings.Find( ullKey ); if ( idx == m_mapExternalStrings.InvalidIndex() ) { ExternalStringCache_t *pNewEntry = new ExternalStringCache_t; V_memset( pNewEntry, 0, sizeof( ExternalStringCache_t ) ); idx = m_mapExternalStrings.InsertOrReplace( ullKey, pNewEntry ); } ExternalStringCache_t *pEntry = m_mapExternalStrings.Element( idx ); if ( V_strcmp( pEntry->m_chExternalString, szExternalString ) ) { V_strcpy_safe( pEntry->m_chExternalString, szExternalString ); wchar_t *wch = ( wchar_t * ) stackalloc( sizeof(pEntry->m_chExternalString)*sizeof( wchar_t ) ); V_UTF8ToUnicode( pEntry->m_chExternalString, wch, sizeof(pEntry->m_chExternalString)*sizeof( wchar_t ) ); if ( CensorBannedWordsInplace( wch ) ) V_UnicodeToUTF8( wch, pEntry->m_chCensoredString, sizeof( pEntry->m_chCensoredString ) ); else V_strcpy_safe( pEntry->m_chCensoredString, pEntry->m_chExternalString ); } return pEntry->m_chCensoredString; } ////////////////////////////////////////////////////////////////////////// // // Banned words interface exposed to clients // ////////////////////////////////////////////////////////////////////////// CBannedWords::~CBannedWords() { delete m_pDictionary; m_pDictionary = NULL; } bool CBannedWords::InitFromFile( char const *szFilename ) { CBannedWordsDictionary *pDictionary = new CBannedWordsDictionary; if ( pDictionary->InitFromFile( szFilename ) ) { delete m_pDictionary; m_pDictionary = pDictionary; return true; } else { delete pDictionary; return false; } } int CBannedWords::CensorBannedWordsInplace( wchar_t *wsz ) const { return ( m_pDictionary && wsz && *wsz ) ? m_pDictionary->CensorBannedWordsInplace( wsz ) : 0; } int CBannedWords::CensorBannedWordsInplace( char *sz ) const { if ( !m_pDictionary ) return 0; if ( !sz || !*sz ) return 0; int nLen = V_strlen( sz ); wchar_t *wch = ( wchar_t * ) stackalloc( ( 1 + nLen )*sizeof( wchar_t ) ); V_UTF8ToUnicode( sz, wch, ( 1 + nLen )*sizeof( wchar_t ) ); int numCensored = m_pDictionary->CensorBannedWordsInplace( wch ); if ( !numCensored ) return 0; V_UnicodeToUTF8( wch, sz, nLen + 1 ); return numCensored; } char const * CBannedWords::CensorExternalString( uint64 ullKey, char const *szExternalString ) const { return ( m_pDictionary && szExternalString && *szExternalString ) ? m_pDictionary->CensorExternalString( ullKey, szExternalString ) : szExternalString; } CBannedWords g_BannedWords;