require 'socket' require 'cgi' class RemoteScrapingSession # A class used in proxying remote scraping sessions. # See the included documentation for more details. # Initialize the class variables and constants. CMD_NEW_SCRAPINGSESSION = "SSNEW" CMD_DISCONNECT = "DISCONNECT" CMD_SETVARIABLE = "SETVAR" CMD_GETVARIABLE = "GETVAR" CMD_GET_DATARECORD = "GETDR" CMD_GET_NUM_DATARECORDS_IN_DATASET = "GETDRDS" CMD_SCRAPE = "SCRAPE" CMD_SET_LAZYSCRAPE = "SETLAZYSCRAPE" COMMAND_DELIM = "#" PARAM_DELIM = "~!~" DATARECORD_PREFIX = "DR#" DATASET_PREFIX = "DS#" ERROR_PREFIX = "ER#" END_OF_LINE_CHAR = 10 DEFAULT_BUFFER_SIZE = 1024 def initialize( name, host = "localhost", port = 8778 ) # Initializes the session using the given name, host, and port. # Note that the host and port parameters are optional. If they're # not designated then the default values will be used. # @param name The name of the scraping session to instantiate. # @param host The host the session should connect to. The default is localhost. # @param port The port the session should connect to. The default is 8778. @is_error = 0 @buffer_size = DEFAULT_BUFFER_SIZE @version = "1.0" begin @socket = TCPSocket.new(host, port) rescue # can't connect to socket raise end relayCommand(CMD_NEW_SCRAPINGSESSION + COMMAND_DELIM + name) end def scrape #Tells the remote scraping session to scrape. relayCommand(CMD_SCRAPE + COMMAND_DELIM) end def getVariable( var_name ) # Gets the value of a variable from the remote scraping session. # @param var_name The name of the desired variable. # @return The value of the variable. return relayCommand(CMD_GETVARIABLE + COMMAND_DELIM + var_name) end def getNumDataRecordsInDataSet( data_set_name ) #Get the number of data records found in a data set. # @param data_set_name The name of the data set. # @return The number of data records. return relayCommand(CMD_GET_NUM_DATARECORDS_IN_DATASET + COMMAND_DELIM + data_set_name) end def getDataRecordFromDataSet( data_set_name, index ) return relayCommand(CMD_GET_DATARECORD + COMMAND_DELIM + data_set_name + PARAM_DELIM + index.to_s) end def setVariable( var_name, value ) # Set a session variable on the remote scraping session. # @param var_name The name of the variable to be set. # @param value The value to be set. relayCommand(CMD_SETVARIABLE + COMMAND_DELIM + var_name + PARAM_DELIM + value.to_s) end def setBufferSize( buffer_size ) # Sets the size of the buffer to be used when reading in data # from the screen-scraper server. # @param buffer_size The size of the buffer, in bytes. @buffer_size = buffer_size end def resetBufferSize # Resets the buffer size to its default. @buffer_size = DEFAULT_BUFFER_SIZE end def relayCommand( commandText ) # Sends a command to the remote server. # @param commandText The text of the command to send. #puts commandText @socket.write(commandText + "\n") serverResponse = @socket.recv(@buffer_size) if serverResponse.length >= @buffer_size # This keeps getting the data until the end of line character is hit. while serverResponse[-1] != END_OF_LINE_CHAR #puts "the response: " + serverResponse.length.to_s + " : " + serverResponse[-1].to_s + " :: "+ serverResponse.strip() serverResponse = serverResponse + @socket.recv(@buffer_size) end end #puts "response code: " + serverResponse[0..2] #puts "the response: " + serverResponse.strip() if serverResponse[0..2] == ERROR_PREFIX: @is_error = 1 @error_message = serverResponse[2..-1] elsif serverResponse[0..2] == DATARECORD_PREFIX: return dataRecordStringToAssociativeArray(serverResponse[3..-1]) elsif serverResponse[0..2] == DATASET_PREFIX: return dataSetStringToArrayOfAssociativeArrays(serverResponse[3..-1]) else return serverResponse.strip end end def dataRecordStringToAssociativeArray( data_record_string ) # Parses a string retrieved as a DataRecord from screen-scraper # into an associative array. # @param dataRecordString The string containing the data record information. # @return The associative array. if data_record_string == nil return nil end params = data_record_string.split("&") return_val = {} for param in params: return_val[param[0..param.index("=") -1 ]] = CGI.unescape(param[param.index("=") + 1..-1]) end return return_val end def dataSetStringToArrayOfAssociativeArrays( data_set_string ) # Parses a string retrieved as a DataSet from screen-scraper # into an ordinal array of associative arrays. # @param dataSetString The string containing the data set information. # @return The array of associative arrays. data_records = data_set_string.split return_val = [] 0.upto(data_records.length) do |i| return_val << dataRecordStringToAssociativeArray(data_records[i]) end return return_val end def isError # Indicates whether or not an error has occurred in the scraping process. # @return A boolean. return @is_error end def getErrorMessage # Return the last error message returned from the server, if one was returned. # @return The error message. return @error_message end def disconnect # Disconnects from the remote server. @socket.close end def setDoLazyScrape( doLazyScrape ) # Indicates whether or not a lazy scrape should be done on the session. Defaults to false. relayCommand(CMD_SET_LAZYSCRAPE + COMMAND_DELIM + (doLazyScrape ? "true" : "false")) end def getVersion # Gets the version of the remote scraping session. return @version end end