Commit 5ab287f25cfe991a4bfac3113f38e4e9caa13f16
0 parents
Exists in
master
First commit
Showing
6 changed files
with
364 additions
and
0 deletions
Show diff stats
1 | +++ a/conda_env_nc2cdf.yml | |
... | ... | @@ -0,0 +1,38 @@ |
1 | +name: nc2cdf | |
2 | +channels: | |
3 | +- defaults | |
4 | +- http://conda.binstar.org/roipoussiere | |
5 | +dependencies: | |
6 | +- curl=7.49.0=1 | |
7 | +- hdf4=4.2.11=0 | |
8 | +- hdf5=1.8.17=1 | |
9 | +- jpeg=9b=0 | |
10 | +- libnetcdf=4.4.1=0 | |
11 | +- mkl=2017.0.1=0 | |
12 | +- netcdf4=1.2.4=np111py36_0 | |
13 | +- numpy=1.11.3=py36_0 | |
14 | +- openssl=1.0.2k=0 | |
15 | +- pip=9.0.1=py36_1 | |
16 | +- python=3.6.0=0 | |
17 | +- readline=6.2=2 | |
18 | +- setuptools=27.2.0=py36_0 | |
19 | +- sqlite=3.13.0=0 | |
20 | +- tk=8.5.18=0 | |
21 | +- wheel=0.29.0=py36_0 | |
22 | +- xz=5.2.2=1 | |
23 | +- zlib=1.2.8=3 | |
24 | +- pip: | |
25 | + - cycler==0.10.0 | |
26 | + - decorator==4.0.11 | |
27 | + - ffnet==0.8.3 | |
28 | + - h5py==2.6.0 | |
29 | + - matplotlib==2.0.0 | |
30 | + - networkx==1.11 | |
31 | + - pyparsing==2.1.10 | |
32 | + - python-dateutil==2.6.0 | |
33 | + - pytz==2016.10 | |
34 | + - scipy==0.18.1 | |
35 | + - six==1.10.0 | |
36 | + - spacepy==0.1.6 | |
37 | +prefix: /home/nathanael/.anaconda3/envs/nc2cdf | |
38 | + | ... | ... |
No preview for this file type
No preview for this file type
1 | +++ a/nc2cdf.py | |
... | ... | @@ -0,0 +1,210 @@ |
1 | +#!/usr/bin/env python | |
2 | +# -*- coding: utf-8 -* | |
3 | + | |
4 | +# Resources: | |
5 | +# - http://pythonhosted.org/SpacePy/pycdf.html | |
6 | +# - http://unidata.github.io/netcdf4-python/ | |
7 | + | |
8 | +import os | |
9 | +import os.path as op | |
10 | +from datetime import datetime | |
11 | + | |
12 | +os.environ['SPACEPY'] = '/tmp' | |
13 | + | |
14 | +import netCDF4 | |
15 | +from spacepy import pycdf | |
16 | + | |
17 | +AMDA_DICT_PATH = './amda.json' | |
18 | +TEMP_FILE = '/tmp' | |
19 | + | |
20 | + | |
21 | +def error(error_message, e=None): | |
22 | + """ Display an `error_message` and exit the program. """ | |
23 | + | |
24 | + import sys | |
25 | + | |
26 | + with open(op.join(TEMP_FILE, 'converter.log'), 'a') as f_log: | |
27 | + f_log.write('With arguments ' + ', '.join(sys.argv)) | |
28 | + f_log.write(error_message) | |
29 | + | |
30 | + sys.stderr.write(error_message + ((':\n %s\n' % e) if e else '\n')) | |
31 | + sys.exit(1) | |
32 | + | |
33 | + | |
34 | +class NetCdf: | |
35 | + SPASE_DATE_FORMAT = '%Y%j%H%M%S' # ex: 2016238000000 | |
36 | + | |
37 | + def __init__(self, input_path): | |
38 | + """Constructor. Load a NetCDF file from `self.netcdf_path`, return a NetCDF4 object.""" | |
39 | + | |
40 | + self.cdf_path = None | |
41 | + self.cdf = None | |
42 | + self.temp_nc = None | |
43 | + | |
44 | + extension = input_path.split('/')[-1].split('.', 1)[1] | |
45 | + if extension == 'nc.gz': | |
46 | + self.unzip(input_path) | |
47 | + elif extension == 'nc': | |
48 | + self.netcdf_path = input_path | |
49 | + else: | |
50 | + error('Unknown file extension "%s".' % extension) | |
51 | + | |
52 | + try: | |
53 | + self.netcdf = netCDF4.Dataset(self.netcdf_path) | |
54 | + except Exception as e: | |
55 | + error('Can not open netCDF file %s' % self.netcdf_path, e) | |
56 | + | |
57 | + if self.temp_nc: | |
58 | + os.remove(self.temp_nc) | |
59 | + | |
60 | + def unzip(self, ncgz_input_path): | |
61 | + import gzip | |
62 | + from shutil import copyfileobj | |
63 | + | |
64 | + netcdf_temp_path = self.build_temp_path(ncgz_input_path, '.nc') | |
65 | + | |
66 | + if not op.exists(ncgz_input_path): | |
67 | + error('Compressed Net-CDF file is not found in "%s".' % ncgz_input_path) | |
68 | + | |
69 | + try: | |
70 | + with gzip.open(ncgz_input_path, 'rb') as f_in, open(netcdf_temp_path, 'wb') as f_out: | |
71 | + copyfileobj(f_in, f_out) | |
72 | + except Exception as e: | |
73 | + error('Can not unzip compressed Net-CDF from %s to %s' % (ncgz_input_path, netcdf_temp_path), e) | |
74 | + | |
75 | + self.temp_nc = netcdf_temp_path | |
76 | + self.netcdf_path = netcdf_temp_path | |
77 | + | |
78 | + def parse_date(self, str_date): | |
79 | + """ Parse the string `str_date` and return the date.""" | |
80 | + | |
81 | + return datetime.strptime(str_date[:4] + str(int(str_date[4:7]) + 1) + str_date[7:13], self.SPASE_DATE_FORMAT) | |
82 | + | |
83 | + # NetCDF methods | |
84 | + | |
85 | + def get_netcdf_path(self): | |
86 | + """Return the NetCDF path.""" | |
87 | + | |
88 | + return self.netcdf_path | |
89 | + | |
90 | + def get_netcdf(self): | |
91 | + """Return the NetCDF object.""" | |
92 | + | |
93 | + return self.netcdf | |
94 | + | |
95 | + def describe(self): | |
96 | + """Display all NetCDF variables of the NetCF file""" | |
97 | + | |
98 | + def describe_var(var): | |
99 | + """Describe an net-cdf variable.""" | |
100 | + | |
101 | + print('== %s ==' % var.name) | |
102 | + print(' - numpy type: %s ^ %s ' % (str(var.dtype), str(var.ndim))) | |
103 | + print(' - dimension(s): %s' % ', '.join(list(var.dimensions))) | |
104 | + print(' - size: %s = %d' % ('x'.join([str(n) for n in var.shape]), var.size)) | |
105 | + if var.ndim == 1 and str(var.dtype) == '|S1': | |
106 | + print(' - values: \'%s\', ...' % ''.join([c.decode('utf-8') for c in var[:]])) | |
107 | + | |
108 | + if var.ncattrs(): | |
109 | + print(' - Attributes:') | |
110 | + for var_attr_name in var.ncattrs(): | |
111 | + print(' - %s: %s' % (var_attr_name, getattr(var, var_attr_name))) | |
112 | + | |
113 | + for (key, value) in self.netcdf.variables.items(): | |
114 | + describe_var(value) | |
115 | + | |
116 | + print('== Global attributes ==') | |
117 | + for global_attr_name in self.netcdf.ncattrs(): | |
118 | + print(' - %s: %s' % (global_attr_name, getattr(self.netcdf, global_attr_name))) | |
119 | + | |
120 | + # CDF methods | |
121 | + | |
122 | + @staticmethod | |
123 | + def create_new_cdf(cdf_path): | |
124 | + """ | |
125 | + Create a new empty CDF file in `self.cdf_path`, return a pyCDF object. | |
126 | + """ | |
127 | + | |
128 | + # Create and clean a new directory for the CDF file. | |
129 | + cdf_dir = op.dirname(cdf_path) | |
130 | + if not op.exists(cdf_dir): | |
131 | + try: | |
132 | + os.makedirs(cdf_dir) | |
133 | + except IOError as e: | |
134 | + error('Can not create directory %s' % cdf_dir, e) | |
135 | + if op.exists(cdf_path): | |
136 | + try: | |
137 | + os.remove(cdf_path) | |
138 | + except IOError as e: | |
139 | + error('A CDF file already exist in %s and it can not be removed' % cdf_path, e) | |
140 | + | |
141 | + # Make the pyCDF object | |
142 | + try: | |
143 | + return pycdf.CDF(cdf_path, '') | |
144 | + except pycdf.CDFError as e: | |
145 | + error('Can not create CDF file on %s, check that the directory exists and its writing access' % cdf_path, e) | |
146 | + | |
147 | + @staticmethod | |
148 | + def build_temp_path(path, extension): | |
149 | + return op.abspath(op.join(TEMP_FILE, op.basename(path).split('.')[0] + extension)) | |
150 | + | |
151 | + def get_cdf(self, cdf_path=None): | |
152 | + """ | |
153 | + Convert and return the CDF object (only return it if it's already converted) | |
154 | + - `cdf_path`: The path of the CDF file (needed for CDF creation), or a temp path if not specified. | |
155 | + """ | |
156 | + | |
157 | + if self.cdf: | |
158 | + return self.cdf | |
159 | + | |
160 | + self.cdf_path = op.abspath(cdf_path) if cdf_path else self.build_temp_path(self.netcdf_path, '.cdf') | |
161 | + self.cdf = self.create_new_cdf(self.cdf_path) | |
162 | + | |
163 | + for global_attr_name in self.netcdf.ncattrs(): | |
164 | + self.cdf.attrs[global_attr_name] = getattr(self.netcdf, global_attr_name) | |
165 | + | |
166 | + for key, var in self.netcdf.variables.items(): | |
167 | + dimensions = list(var.dimensions) | |
168 | + | |
169 | + if str(var.dtype) == '|S1': | |
170 | + if len(dimensions) == 1: | |
171 | + var_str = str(netCDF4.chartostring(var[:]).astype(str, copy=False)) | |
172 | + self.cdf.attrs[key] = self.parse_date(var_str) if dimensions[0] == 'TimeLength' else var_str | |
173 | + elif len(dimensions) == 2: | |
174 | + var_strs = map(str, netCDF4.chartostring(var[:]).astype(str, copy=False)) | |
175 | + self.cdf[key] = list(map(self.parse_date, var_strs) if dimensions[1] == 'TimeLength' else var_strs) | |
176 | + else: | |
177 | + self.cdf[key] = var[:] | |
178 | + for var_attr_name in var.ncattrs(): | |
179 | + self.cdf[key].attrs[var_attr_name] = getattr(var, var_attr_name) | |
180 | + | |
181 | + return self.cdf | |
182 | + | |
183 | + def get_cdf_path(self): | |
184 | + return self.cdf_path | |
185 | + | |
186 | + | |
187 | +if __name__ == '__main__': | |
188 | + import sys | |
189 | + | |
190 | + if len(sys.argv) not in (2, 3): | |
191 | + print('usage:') | |
192 | + print('- `%s path/to/file.nc.gz` # Convert a Net-CDF file, ' | |
193 | + 'save it in a temp directory, then display its path.' % sys.argv[0]) | |
194 | + print('- `%s -i path/to/file.nc.gz` # Display information about a Net-CDF file.' % sys.argv[0]) | |
195 | + print('- `%s path/to/file.nc.gz path/to/file.cdf` # Convert a Net-CDF file.' | |
196 | + 'and save it in the specified path.' % sys.argv[0]) | |
197 | + print('This script can also be used as a Python library.') | |
198 | + exit(1) | |
199 | + | |
200 | + if len(sys.argv) == 2: | |
201 | + netcdf = NetCdf(sys.argv[1]) | |
202 | + netcdf.get_cdf() | |
203 | + print('File stored in "%s".' % netcdf.get_cdf_path()) | |
204 | + elif len(sys.argv) == 3: | |
205 | + if sys.argv[1] == '-i': | |
206 | + netcdf = NetCdf(sys.argv[2]) | |
207 | + netcdf.describe() | |
208 | + else: | |
209 | + netcdf = NetCdf(sys.argv[1]) | |
210 | + netcdf.get_cdf(sys.argv[2]) | ... | ... |
1 | +++ a/pip_req_nc2cdf.txt | |
... | ... | @@ -0,0 +1,16 @@ |
1 | +appdirs==1.4.0 | |
2 | +cycler==0.10.0 | |
3 | +decorator==4.0.11 | |
4 | +functools32==3.2.3.post2 | |
5 | +h5py==2.6.0 | |
6 | +matplotlib==2.0.0 | |
7 | +netCDF4==1.2.7 | |
8 | +networkx==1.11 | |
9 | +numpy==1.12.0 | |
10 | +packaging==16.8 | |
11 | +pyparsing==2.1.10 | |
12 | +python-dateutil==2.6.0 | |
13 | +pytz==2016.10 | |
14 | +scipy==0.18.1 | |
15 | +six==1.10.0 | |
16 | +subprocess32==3.2.7 | ... | ... |
1 | +++ a/readme.md | |
... | ... | @@ -0,0 +1,100 @@ |
1 | +# CDF tools | |
2 | + | |
3 | +## NetCDF to CDF converter | |
4 | + | |
5 | +- File: [nc2cdf.py](./nc2cdf.py) | |
6 | +- Python interpreter: 3.6 | |
7 | + | |
8 | +### CLI usage | |
9 | + | |
10 | +#### Converting a Net-CDF file: | |
11 | + | |
12 | +Convert the NetCDF file, save it in a temp directory, then display its path: | |
13 | + | |
14 | + ./nc2cdf.py path/to/input_file.nc.gz | |
15 | + | |
16 | +Convert a Net-CDF file and save it in the specified path. | |
17 | + | |
18 | + ./nc2cdf.py path/to/input_file.nc.gz path/to/output_file.cdf | |
19 | + | |
20 | +**Note:** If the specified input file is a gzip archive, it will be automatically extracted before the conversion. | |
21 | + | |
22 | +#### Describing a NetCDf file: | |
23 | + | |
24 | + ./nc2cdf.py -i path/to/file.nc.gz | |
25 | + | |
26 | +This display information about a Net-CDF file (such as global attributes and variables information). | |
27 | + | |
28 | +### Python library usage | |
29 | + | |
30 | + import nc2cdf | |
31 | + | |
32 | + netcdf = NetCdf('path/to/input_file.nc.gz') | |
33 | + | |
34 | + netcdf.describe() | |
35 | + | |
36 | + netcdf.get_cdf() | |
37 | + print('CDF path: ' + netcdf.get_cdf_path()) | |
38 | + | |
39 | + netcdf.get_cdf('path/to/output_file.cdf') | |
40 | + | |
41 | +## About NetCDF4 and pycdf | |
42 | + | |
43 | +### NetCDF4 | |
44 | + | |
45 | +[NetCDF](https://www.unidata.ucar.edu/software/netcdf/) is C library to read and edit NetCDF files. | |
46 | + | |
47 | +[NetCDF4](https://github.com/Unidata/netcdf4-python) is a Python wrapper for NetCDF, which require the NetCDF library, used here to read NetCDF files. | |
48 | + | |
49 | +Documentation is available [here](http://unidata.github.io/netcdf4-python/). | |
50 | + | |
51 | +### pycdf | |
52 | + | |
53 | +[SpacePy](http://pythonhosted.org/SpacePy/index.html) is a python package for space sciences, used here to write CDF files. | |
54 | + | |
55 | +Documentation of the package spacepy.pycdf is available [here](http://pythonhosted.org/SpacePy/pycdf.htm). | |
56 | + | |
57 | +### Install the Python environment and dependencies | |
58 | + | |
59 | +We will install dependencies in Python environments. | |
60 | + | |
61 | +#### If you have NetCDF installed on your machine | |
62 | + | |
63 | +You can use pip and virtualenv: | |
64 | + | |
65 | + pip install virtualenv | |
66 | + virtualenv -p python3 nc2cdf | |
67 | + source nc2cdf/bin/activate # Or ". nc2cdf/bin/activate.fish" on Fish terms | |
68 | + pip install -r pip_req_nc2cdf.txt | |
69 | + | |
70 | +#### If you don't have NetCDF installed on your machine | |
71 | + | |
72 | +The easier way is to use [Anaconda](https://docs.continuum.io/), which is a tool to install compiled Python dependencies in environments. | |
73 | + | |
74 | +1. [Install Anaconda3](https://docs.continuum.io/anaconda/install). | |
75 | +2. Edit your system startup file: | |
76 | + | |
77 | +I recommend to add an alias which set the Python path. In this way the Anaconda Python will not be used by default and you can easily deal with multiple Anaconda versions. | |
78 | + | |
79 | +At the end of your `~/.bashrc`: | |
80 | + | |
81 | + alias conda3="set PATH $HOME/.anaconda3/bin/ $PATH" | |
82 | + | |
83 | +Or on Fish terms (`~/.config/omf/init.fish`): | |
84 | + | |
85 | + alias conda3="set PATH $HOME/.anaconda3/bin/ $PATH; and source $HOME/.anaconda3/etc/fish/conf.d/conda.fish | |
86 | + | |
87 | +3. Create the environment | |
88 | + | |
89 | +conda3 | |
90 | +conda create -f conda_env_nc2cdf.yml | |
91 | + | |
92 | +4. Activate the Conda environment: | |
93 | + | |
94 | +source activate nc2cdf # or only "activate nc2cdf" on Fish terms | |
95 | + | |
96 | +### Licence | |
97 | + | |
98 | +- License: [GPLv3](https://www.gnu.org/licenses/gpl-3.0.html); | |
99 | +- Credits: CNRS/IRAP; | |
100 | +- contact: nathanael.jourdane@irap.omp.eu | ... | ... |