POST multiple files using Django test client in same request - python

I am trying to build some tests for uploads on my Django site. It allows multiple files to be uploaded so I need to test when multiple files are uploaded.
Testing one file works great:
from django.test import Client
def test_stuff(self):
myfiles = open('....\file.csv','r')
c = Client()
response = c.post('/', {'file':myfiles})
But trying it with a list of files doesn't work.
def test_stuff(self):
file_list = # get list of file paths to open
myfiles = []
for file in file_list:
myfiles.append(open('....\file.csv','r'))
c = Client()
response = c.post('/', {'file':myfiles})
And neither does:
def test_stuff(self):
file_list = # get list of file paths to open
myfiles = []
for file in file_list:
myfiles.append(open('....\file.csv','r'))
c = Client()
response = c.post('/', files={'file':myfiles})
or
def test_stuff(self):
file_list = # get list of file paths to open
myfiles = []
for file in file_list:
myfiles.append(open('....\file.csv','r'))
c = Client()
response = c.post('/', files=myfiles)
My view gets the files from request.POST.get('myfiles'), but FILES is empty.
Is there a way to POST multiple files with django test client or should I use something else?

Related

How to serve file with cherrypy and delete the file from server?

I need to create a file on the server, serve it to the client, and I would prefer to delete the file from the server afterwards.
Here's what I'm currently trying:
def myaction():
fs = facts.generatefacts(int(low),int(high),int(amount),op)
filename = 'test.txt'
FILE = open(filename,'w')
FILE.writelines('my\nstuff\nhere')
FILE.close()
RETURN_FILE = open(filename,'r')
return serve_fileobj(RETURN_FILE,disposition='attachment',
content_type='.txt',name=filename)
myaction.exposed = True
There are several things about this that I don't like. I wouldn't think that I would have to open the file twice, for example. I would expect that there is a way to write content directly to the response object, without ever creating a file object, but that isn't my question today.
The above code accomplishes what I want, but leaves a file behind. If I delete the file before returning the response, then (of course) the file isn't found.
Is there a way to remove this file as soon as it has been served?
I'm coming from the Java world, so I'm a bit disoriented, and any other suggestions to improve the above are appreciated.
1) You can move file to temporary folder and remove all files older 0.5 hours
2) You can try
result = serve_fileobj(RETURN_FILE,disposition='attachment',
content_type='.txt',name=filename)
os.unlink(filename)
return result
3) Try to use StringIO file object that can wrap string to look like a file.
The solution below uses weak references to clean up temporary files (and in this case temporary directories) once the file is fully sent by cherrypy.
I've used temporary directories, since that allows for processes that may create several files before sending the final result (eg, returning a zipped up excel file for example), but for simple cases just a single temporary file would work fine.
import cherrypy
from cherrypy import expose
import zipfile
import weakref
import shutil
import os
import tempfile
PARENT_TEMP_DATA_DIR = '/tmp/cherrypy_data_files'
def single_file_zip(filepath):
'Give a filepath, creates a zip archive in the same directory, with just the single file inside'
filename = os.path.basename(filepath)
zipname = '%s.zip' % os.path.splitext(filename)[0]
if filename.lower() == zipname.lower():
raise ValueError("Can't use .zip file as source")
zippath = os.path.join(os.path.dirname(filepath), zipname)
zf = zipfile.ZipFile(zippath, mode='w', compression=zipfile.ZIP_DEFLATED)
zf.write(filepath, filename)
zf.close()
return zippath
class DataFiles(object):
def __init__(self):
self.weak_references = {}
def cleanup(self, wr):
if wr in self.weak_references:
filepath = self.weak_references[wr]
if os.path.isfile(filepath):
try:
os.remove(filepath)
except Exception:
pass
if os.path.isdir(filepath):
shutil.rmtree(filepath, ignore_errors=True)
self.weak_references.pop(wr)
#expose
def index(self):
tempdir = os.path.abspath(tempfile.mkdtemp(dir=PARENT_TEMP_DATA_DIR))
txt_path = os.path.join(tempdir, 'my_data.txt')
with open(txt_path, 'wb') as fh:
fh.write('lots of data here\n')
zip_path = single_file_zip(txt_path)
os.remove(txt_path) # not strictly needed, as the cleanup routine would remove this anyway
result = cherrypy.lib.static.serve_download(zip_path)
# the weak-reference allows automatic cleanup of the temp dir once the file is served
wr = weakref.ref(result, self.cleanup)
self.weak_references[wr] = tempdir
return result
if __name__=='__main__':
# on startup clean up any prior temporary data
tempdir_parent = PARENT_TEMP_DATA_DIR
print 'Clearing %s' % tempdir_parent
if not os.path.exists(tempdir_parent):
os.mkdir(tempdir_parent)
for filename in os.listdir(tempdir_parent):
filepath = os.path.join(tempdir_parent, filename)
if os.path.isfile(filepath):
print 'Deleting file %s' % filepath
os.remove(filepath)
if os.path.isdir(filepath):
print 'Removing directory %s and all contents' % filepath
shutil.rmtree(filepath, ignore_errors=True)
# start CherryPy
cherrypy.quickstart(DataFiles())
This worked for me:
class Handler:
...
def download_complete(self)
os.unlink(cherrypy.request.zipFileName)
def download(self, path)
zipFileName = createZip(path)
cherrypy.request.zipFileName = zipFileName
cherrypy.request.hooks.attach('on_end_request', self.download_complete)
return cherrypy.lib.static.serve_download(zipFileName)

Stream files to kafka using airflow

What is the best approach to stream CSV files to a kafka topic using airflow ?
Writing a custom Operator for airflow ?
Probably best to use the PythonOperator to process the files line-by-line. I have a use case where I poll and SFTP server for files and when I find some, I process them line-by-line, writing the results out as JSON. I do things like parse dates into a YYYY-MM-DD format, etc. Something like this might work for you:
def csv_file_to_kafka(**context):
f = '/path/to/downloaded/csv_file.csv'
csvfile = open(f, 'r')
reader = csv.DictReader(csvfile)
for row in reader:
"""
Send the row to Kafka
"""
return
csv_file_to_kafka = PythonOperator(
task_id='csv_file_to_kafka',
python_callable=csv_file_to_kafka,
dag=dag
)
Now it's really up to you how you would get the files to download. In my case, I use the SSHHook and GoogleCloudStorageHook to get files from an SFTP Server and then pass the names of the files to an task that parses and cleans the csv files. I do this by pulling the files down from SFTP and putting them into Google Cloud Storage:
"""
HOOKS: Connections to external systems
"""
def sftp_connection():
"""
Returns an SFTP connection created using the SSHHook
"""
ssh_hook = SSHHook(ssh_conn_id='sftp_connection')
ssh_client = ssh_hook.get_conn()
return ssh_client.open_sftp()
def gcs_connection():
"""
Returns an GCP connection created using the GoogleCloudStorageHook
"""
return GoogleCloudStorageHook(google_cloud_storage_conn_id='my_gcs_connection')
"""
PYTHON CALLABLES: Called by PythonOperators
"""
def get_files(**context):
"""
Looks at all files on the FTP server and returns a list files.
"""
sftp_client = sftp_connection()
all_files = sftp_client.listdir('/path/to/files/')
files = []
for f in all_files:
files.append(f)
return files
def save_files(**context):
"""
Looks to see if a file already exists in GCS. If not, the file is downloaed
from SFTP server and uploaded to GCS. A list of
"""
files = context['task_instance'].xcom_pull(task_ids='get_files')
sftp_client = sftp_connection()
gcs = gcs_connection()
new_files = []
new_outcomes_files = []
new_si_files = []
new_files = process_sftp_files(files, gcs, sftp_client)
return new_files
def csv_file_to_kafka(**context):
"""
Untested sample parse csv files and send to kafka
"""
files = context['task_instance'].xcom_pull(task_ids='save_files')
for f in new_files:
csvfile = open(f, 'r')
reader = csv.DictReader(csvfile)
for row in reader:
"""
Send the row to Kafka
"""
return
get_files = PythonOperator(
task_id='get_files',
python_callable=get_files,
dag=dag
)
save_files = PythonOperator(
task_id='save_files',
python_callable=save_files,
dag=dag
)
csv_file_to_kafka = PythonOperator(
task_id='csv_file_to_kafka',
python_callable=csv_file_to_kafka,
dag=dag
)
I know I could do this all in one big python callable, that's how I'm refactoring the code now so that in the callable. So it polls the SFTP server, pulls the latest files, and parses them according to my rules all in one single python function. I have heard that using XCom isn't ideal, Airflow tasks aren't supposed to communicate with each other too much, supposedly.
Depending on your use case, you might even want to explore something like Apache Nifi, I'm actually looking into that now too.

cherry py auto download file

I'm currently building cherry py app for my projects and at certain function I need auto starting download a file.
After zip file finish generating, I want to start downloading to client
So after images are created, they are zipped and sent to client
class Process(object):
exposed = True
def GET(self, id, norm_all=True, format_ramp=None):
...
def content(): #generating images
...
def zipdir(basedir, archivename):
assert os.path.isdir(basedir)
with closing(ZipFile(archivename, "w", ZIP_DEFLATED)) as z:
for root, dirs, files in os.walk(basedir):
#NOTE: ignore empty directories
for fn in files:
absfn = os.path.join(root, fn)
zfn = absfn[len(basedir)+len(os.sep):] #XXX: relative path
z.write(absfn, zfn)
zipdir("/data/images/8","8.zip")
#after zip file finish generating, I want to start downloading to client
#so after images are created, they are zipped and sent to client
#and I'm thinking do it here, but don't know how
return content()
GET._cp_config = {'response.stream': True}
def POST(self):
global proc
global processing
proc.kill()
processing = False
Just create a zip archive in memory and then return it using file_generator() helper function from cherrypy.lib. You may as well yield HTTP response to enable streaming capabilities (keep in mind to set HTTP headers prior to doing that).
I wrote a simple example (based on your snippet) for you just returning a whole buffered zip archive.
from io import BytesIO
import cherrypy
from cherrypy.lib import file_generator
class GenerateZip:
#cherrypy.expose
def archive(self, filename):
zip_archive = BytesIO()
with closed(ZipFile(zip_archive, "w", ZIP_DEFLATED)) as z:
for root, dirs, files in os.walk(basedir):
#NOTE: ignore empty directories
for fn in files:
absfn = os.path.join(root, fn)
zfn = absfn[len(basedir)+len(os.sep):] #XXX: relative path
z.write(absfn, zfn)
cherrypy.response.headers['Content-Type'] = (
'application/zip'
)
cherrypy.response.headers['Content-Disposition'] = (
'attachment; filename={fname}.zip'.format(
fname=filename
)
)
return file_generator(zip_archive)
N.B. I didn't test this specific piece of code, but the general idea is correct.

How do I fix this file_tracker that reads/writes using JSON dictionaries?

I am trying to write a script that tracks for changes made in directories/files set to multiple file paths created by an installer. I found Thomas Sileo's DirTools project on git, modified it, but am now running into some issues when writing/reading from JSON:
1) First, I believe that I am writing to JSON incorrectly and am finding that my create_state() function is only writing the last path I need.
2) If I get it working, I am unable to read/parse the file like I was before. I usually get ValueError: Extra data errors
Code below:
import os import json import getpass
files = [] subdirs = []
USER = getpass.getuser()
pathMac = ['/Applications/',
'/Users/' + USER + '/Documents/' ]
def create_dir_index(path):
files = []
subdirs = []
for root, dirs, filenames in os.walk(path):
for subdir in dirs:
subdirs.append(os.path.relpath(os.path.join(root, subdir), path))
for f in filenames:
files.append(os.path.relpath(os.path.join(root, f), path))
return dict(files=files, subdirs=subdirs)
def create_state(): for count in xrange(len(pathMac)):
dir_state = create_dir_index(pathMac[count])
out_file = open("Manifest.json", "w")
json.dump(dir_state, out_file)
out_file.close()
def compare_states(dir_base, dir_cmp):
'''
return a comparison two manifest json files
'''
data = {}
data['deleted'] = list(set(dir_cmp['files']) - set(dir_base['files']))
data['created'] = list(set(dir_base['files']) - set(dir_cmp['files']))
data['deleted_dirs'] = list(set(dir_cmp['subdirs']) - set(dir_base['subdirs']))
data['created_dirs'] = list(set(dir_base['subdirs']) - set(dir_cmp['subdirs']))
return data
if __name__ == '__main__':
response = raw_input("Would you like to Compare or Create? ")
if response == "Create":
# CREATE MANIFEST json file
create_state()
print "Manifest file created."
elif response == "Compare":
# create the CURRENT state of all indexes in pathMac and write to json file
for count in xrange(len(pathMac)):
dir_state = create_dir_index(pathMac[count])
out_file = open("CurrentState.json", "w")
json.dump(dir_state, out_file)
out_file.close()
# Open and Load the contents from the file into dictionaries
manifest = json.load(open("Manifest.json", "r"))
current = json.load(open("CurrentState.json", "r"))
print compare_states(current, manifest)

Dropbox upload file by python error

I'm trying to upload files list to dropbox, but I get various kinds of errors. I tried this, and tried everything on the net. But I still can't get it work.
# Include the Dropbox SDK libraries
from dropbox import rest, session
import webbrowser
import os
import glob
import zipfile
import datetime
from dropbox import client
# Get your app key and secret from the Dropbox developer website
# (app keys defined here)
# ACCESS_TYPE should be 'dropbox' or 'app_folder' as configured for your app
ACCESS_TYPE = 'app_folder'
sess = session.DropboxSession(APP_KEY, APP_SECRET, ACCESS_TYPE)
request_token = sess.obtain_request_token()
url = sess.build_authorize_url(request_token)
file_list = []
#get the date
now = datetime.datetime.now()
def initialize():
#print "url:", url
# open a public URL, in this case, the webbrowser docs
#url = "http://docs.python.org/library/webbrowser.html"
webbrowser.open(url)
print "Please click the 'Allow' button to Authorize..."
print """
Please select by entering the specific number...
1 Backup all my files by one by one
2 Backup specific folder
3 Backup specific file
4 Get my account details
5 About this software
6 Exit
"""
try:
# This will fail if the user didn't visit the above URL and hit 'Allow'
access_token = sess.obtain_access_token(request_token)
except:
print "Error has occured"
def getAccountInfo():
from dropbox import client
client = client.DropboxClient(sess)
account_info_dict = client.account_info()
print "linked account:"
for item in account_info_dict:
if type(account_info_dict[item]) == dict:
inner_dict = account_info_dict[item]
for item1 in inner_dict:
print item1, ":", inner_dict[item1]
print item, ":", account_info_dict[item]
def getAllFiles():
for dirname, dirnames, filenames in os.walk('I:/'):
for subdirname in dirnames:
print os.path.join(dirname, subdirname)
for filename in filenames:
file_name = os.path.join(dirname, filename)
print file_name
file_list.append(file_name)
return file_list
def upload_one_by_one(sess):
from dropbox import client
files = getAllFiles()
client = client.DropboxClient(sess)
#zip_file_name = now.strftime("%Y%m%d%H%M")+ ".zip"
#z = zipfile.ZipFile(zip_file_name, "w")
for file_item in files:
#z.write(file_item)
#f = open(zip_file_name)
response = client.put_file("test/", file_item)
print "uploaded:", response
break
initialize()
#getAccountInfo()
upload_one_by_one(sess)
The problem with the client.put_file() argument. I am trying to upload image file in the folder of test. But yet it upload as a text file with path to the image file inside it.
Can't I upload image files to dropbox via dropbox sdk?
It seems that you didn't open the file to upload, this may work:
for file_item in files:
file = open(file_item)
response = client.put_file("test/", file)
put_file() takes either a file-like object or a string that represents the contents of a file. If you pass in a file name, it won't automatically read in the contents of the file.

Resources