Hello and welcome to our community! Is this your first visit?
Register
Enjoy an ad free experience by logging in. Not a member yet? Register.
Results 1 to 2 of 2
  1. #1
    New to the CF scene
    Join Date
    Dec 2011
    Posts
    1
    Thanks
    0
    Thanked 0 Times in 0 Posts

    Question htaccess redirect help

    Hi,

    I'm going to redirect :
    http://mysite.com/mysite.com/
    to
    http://mysite.com

    google is crawling my website and found more than 400 urls with 404 not found messages which all started with double home links as i mentioned above.

    here is my htaccess
    Code:
    RewriteEngine On
    
    # Block out any script trying to set a mosConfig value through the URL
    RewriteCond %{QUERY_STRING} mosConfig_[a-zA-Z_]{1,21}(=|\%3D) [OR]
    # Block out any script trying to base64_encode data within the URL
    RewriteCond %{QUERY_STRING} base64_encode[^(]*\([^)]*\) [OR]
    # Block out any script that includes a <script> tag in URL
    RewriteCond %{QUERY_STRING} (<|%3C)([^s]*s)+cript.*(>|%3E) [NC,OR]
    # Block out any script trying to set a PHP GLOBALS variable via URL
    RewriteCond %{QUERY_STRING} GLOBALS(=|\[|\%[0-9A-Z]{0,2}) [OR]
    # Block out any script trying to modify a _REQUEST variable via URL
    RewriteCond %{QUERY_STRING} _REQUEST(=|\[|\%[0-9A-Z]{0,2})
    # Return 403 Forbidden header and show the content of the root homepage
    RewriteRule .* index.php [F]
    #
    ########## End - Rewrite rules to block out some common exploits
    
    ########## Begin - Custom redirects
    #
    # If you need to redirect some pages, or set a canonical non-www to
    # www redirect (or vice versa), place that code here. Ensure those
    # redirects use the correct RewriteRule syntax and the [R=301,L] flags.
    #
    ########## End - Custom redirects
    
    #
    RewriteRule .* - [E=HTTP_AUTHORIZATION:%{HTTP:Authorization}]
    #
    # If the requested path and file is not /index.php and the request
    # has not already been internally rewritten to the index.php script
    RewriteCond %{REQUEST_URI} !^/index\.php
    # and the request is for root, or for an extensionless URL, or the
    # requested URL ends with one of the listed extensions
    RewriteCond %{REQUEST_URI} (/[^.]*|\.(php|html?|feed|pdf|raw))$ [NC]
    # and the requested path and file doesn't directly match a physical file
    RewriteCond %{REQUEST_FILENAME} !-f
    # and the requested path and file doesn't directly match a physical folder
    RewriteCond %{REQUEST_FILENAME} !-d
    # internally rewrite the request to the index.php script
    RewriteRule .* index.php [L]
    #
    ########## End - 
    SetEnvIf Request_URI ".*configuration\.php" blocktheaccess
    Order deny,allow
    Deny from env=blocktheaccess
    ########## Block bad user agents
     RewriteCond %{HTTP_USER_AGENT} ^BlackWidow [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Bot\ mailto:craftbot@yahoo.com [OR]
     RewriteCond %{HTTP_USER_AGENT} ^ChinaClaw [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Custo [OR]
     RewriteCond %{HTTP_USER_AGENT} ^DISCo [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Download\ Demon [OR]
     RewriteCond %{HTTP_USER_AGENT} ^eCatch [OR]
     RewriteCond %{HTTP_USER_AGENT} ^EirGrabber [OR]
     RewriteCond %{HTTP_USER_AGENT} ^EmailSiphon [OR]
     RewriteCond %{HTTP_USER_AGENT} ^EmailWolf [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Express\ WebPictures [OR]
     RewriteCond %{HTTP_USER_AGENT} ^ExtractorPro [OR]
     RewriteCond %{HTTP_USER_AGENT} ^EyeNetIE [OR]
     RewriteCond %{HTTP_USER_AGENT} ^FlashGet [OR]
     RewriteCond %{HTTP_USER_AGENT} ^GetRight [OR]
     RewriteCond %{HTTP_USER_AGENT} ^GetWeb! [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Go!Zilla [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Go-Ahead-Got-It [OR]
     RewriteCond %{HTTP_USER_AGENT} ^GrabNet [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Grafula [OR]
     RewriteCond %{HTTP_USER_AGENT} ^HMView [OR]
     RewriteCond %{HTTP_USER_AGENT} HTTrack [NC,OR]
     RewriteCond %{HTTP_USER_AGENT} ^Image\ Stripper [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Image\ Sucker [OR]
     RewriteCond %{HTTP_USER_AGENT} Indy\ Library [NC,OR]
     RewriteCond %{HTTP_USER_AGENT} ^InterGET [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Internet\ Ninja [OR]
     RewriteCond %{HTTP_USER_AGENT} ^JetCar [OR]
     RewriteCond %{HTTP_USER_AGENT} ^JOC\ Web\ Spider [OR]
     RewriteCond %{HTTP_USER_AGENT} ^larbin [OR]
     RewriteCond %{HTTP_USER_AGENT} ^LeechFTP [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Mass\ Downloader [OR]
     RewriteCond %{HTTP_USER_AGENT} ^MIDown\ tool [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Mister\ PiX [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Navroad [OR]
     RewriteCond %{HTTP_USER_AGENT} ^NearSite [OR]
     RewriteCond %{HTTP_USER_AGENT} ^NetAnts [OR]
     RewriteCond %{HTTP_USER_AGENT} ^NetSpider [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Net\ Vampire [OR]
     RewriteCond %{HTTP_USER_AGENT} ^NetZIP [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Octopus [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Offline\ Explorer [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Offline\ Navigator [OR]
     RewriteCond %{HTTP_USER_AGENT} ^PageGrabber [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Papa\ Foto [OR]
     RewriteCond %{HTTP_USER_AGENT} ^pavuk [OR]
     RewriteCond %{HTTP_USER_AGENT} ^pcBrowser [OR]
     RewriteCond %{HTTP_USER_AGENT} ^RealDownload [OR]
     RewriteCond %{HTTP_USER_AGENT} ^ReGet [OR]
     RewriteCond %{HTTP_USER_AGENT} ^SiteSnagger [OR]
     RewriteCond %{HTTP_USER_AGENT} ^SmartDownload [OR]
     RewriteCond %{HTTP_USER_AGENT} ^SuperBot [OR]
     RewriteCond %{HTTP_USER_AGENT} ^SuperHTTP [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Surfbot [OR]
     RewriteCond %{HTTP_USER_AGENT} ^tAkeOut [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Teleport\ Pro [OR]
     RewriteCond %{HTTP_USER_AGENT} ^VoidEYE [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Web\ Image\ Collector [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Web\ Sucker [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WebAuto [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WebCopier [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WebFetch [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WebGo\ IS [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WebLeacher [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WebReaper [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WebSauger [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Website\ eXtractor [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Website\ Quester [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WebStripper [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WebWhacker [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WebZIP [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Widow [OR]
     RewriteCond %{HTTP_USER_AGENT} ^WWWOFFLE [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Xaldon\ WebSpider [OR]
     RewriteCond %{HTTP_USER_AGENT} ^Zeus
     RewriteRule ^.* – [F,L]
     ##########
     RewriteCond %{HTTP_REFERER}    ^(.*)(<|>|’|%0A|%0D|%27|%3C|%3E|%00).* [NC,OR]
     RewriteCond %{HTTP_COOKIE}     ^.*(<|>|’|%0A|%0D|%27|%3C|%3E|%00).* [NC,OR]
     RewriteCond %{REQUEST_URI}     ^/(,|;|:|<|>|”>|”<|/|\\\.\.\\).{0,9999}.* [NC,OR]
    
    RewriteCond %{HTTP_USER_AGENT} ^$ [OR]
     RewriteCond %{HTTP_USER_AGENT} ^(java|curl|wget).* [NC,OR]
     RewriteCond %{HTTP_USER_AGENT} ^.*(winhttp|HTTrack|clshttp|archiver|loader|email|harvest|extract|grab|miner).* [NC,OR]
     RewriteCond %{HTTP_USER_AGENT} ^.*(libwww-perl|curl|wget|python|nikto|scan).* [NC,OR]
     RewriteCond %{HTTP_USER_AGENT} ^.*(<|>|’|%0A|%0D|%27|%3C|%3E|%00).* [NC,OR]
    
    RewriteCond %{QUERY_STRING}    ^.*(;|<|>|’|”|\)|%0A|%0D|%22|%27|%3C|%3E|%00).*(/\*|union|select|insert|cast|set|declare|drop|update|md5|benchmark).* [NC,OR]
     RewriteCond %{QUERY_STRING}    ^.*(localhost|loopback|127\.0\.0\.1).* [NC,OR]
     RewriteCond %{QUERY_STRING}    ^.*\.[A-Za-z0-9].* [NC,OR]
     RewriteCond %{QUERY_STRING}    ^.*(<|>|’|%0A|%0D|%27|%3C|%3E|%00).* [NC]
     ##########
    Last edited by payymon; 12-04-2011 at 08:48 AM.

  • #2
    New Coder
    Join Date
    Oct 2011
    Location
    San Francisco, CA
    Posts
    22
    Thanks
    1
    Thanked 1 Time in 1 Post
    this is not something that can be handled by only redirecting the folder to root. You have to send google a request of directory removal in google webmaster central. first you need to block the site.com directory through robots.txt file with below code

    Code:
    User-Agent: *
    Disallow: /site.com/
    Now you need to do a directory removal request through GWMT. After some more crawls google will remove all the URLs under the site.com directory and your 404 will be less.
    Search Engine Optimization, San Francisco Internet Marketing
    SEO Bay Area |AND| Custom Website Design


  •  

    Tags for this Thread

    Posting Permissions

    • You may not post new threads
    • You may not post replies
    • You may not post attachments
    • You may not edit your posts
    •